162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci   drbd_receiver.c
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
862306a36Sopenharmony_ci   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
962306a36Sopenharmony_ci   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/uaccess.h>
1762306a36Sopenharmony_ci#include <net/sock.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <linux/drbd.h>
2062306a36Sopenharmony_ci#include <linux/fs.h>
2162306a36Sopenharmony_ci#include <linux/file.h>
2262306a36Sopenharmony_ci#include <linux/in.h>
2362306a36Sopenharmony_ci#include <linux/mm.h>
2462306a36Sopenharmony_ci#include <linux/memcontrol.h>
2562306a36Sopenharmony_ci#include <linux/mm_inline.h>
2662306a36Sopenharmony_ci#include <linux/slab.h>
2762306a36Sopenharmony_ci#include <uapi/linux/sched/types.h>
2862306a36Sopenharmony_ci#include <linux/sched/signal.h>
2962306a36Sopenharmony_ci#include <linux/pkt_sched.h>
3062306a36Sopenharmony_ci#include <linux/unistd.h>
3162306a36Sopenharmony_ci#include <linux/vmalloc.h>
3262306a36Sopenharmony_ci#include <linux/random.h>
3362306a36Sopenharmony_ci#include <linux/string.h>
3462306a36Sopenharmony_ci#include <linux/scatterlist.h>
3562306a36Sopenharmony_ci#include <linux/part_stat.h>
3662306a36Sopenharmony_ci#include "drbd_int.h"
3762306a36Sopenharmony_ci#include "drbd_protocol.h"
3862306a36Sopenharmony_ci#include "drbd_req.h"
3962306a36Sopenharmony_ci#include "drbd_vli.h"
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistruct packet_info {
4462306a36Sopenharmony_ci	enum drbd_packet cmd;
4562306a36Sopenharmony_ci	unsigned int size;
4662306a36Sopenharmony_ci	unsigned int vnr;
4762306a36Sopenharmony_ci	void *data;
4862306a36Sopenharmony_ci};
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cienum finish_epoch {
5162306a36Sopenharmony_ci	FE_STILL_LIVE,
5262306a36Sopenharmony_ci	FE_DESTROYED,
5362306a36Sopenharmony_ci	FE_RECYCLED,
5462306a36Sopenharmony_ci};
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection);
5762306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection);
5862306a36Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *);
5962306a36Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection);
6062306a36Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
6162306a36Sopenharmony_cistatic int e_end_block(struct drbd_work *, int);
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci/*
6762306a36Sopenharmony_ci * some helper functions to deal with single linked page lists,
6862306a36Sopenharmony_ci * page->private being our "next" pointer.
6962306a36Sopenharmony_ci */
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci/* If at least n pages are linked at head, get n pages off.
7262306a36Sopenharmony_ci * Otherwise, don't modify head, and return NULL.
7362306a36Sopenharmony_ci * Locking is the responsibility of the caller.
7462306a36Sopenharmony_ci */
7562306a36Sopenharmony_cistatic struct page *page_chain_del(struct page **head, int n)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	struct page *page;
7862306a36Sopenharmony_ci	struct page *tmp;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	BUG_ON(!n);
8162306a36Sopenharmony_ci	BUG_ON(!head);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	page = *head;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	if (!page)
8662306a36Sopenharmony_ci		return NULL;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	while (page) {
8962306a36Sopenharmony_ci		tmp = page_chain_next(page);
9062306a36Sopenharmony_ci		if (--n == 0)
9162306a36Sopenharmony_ci			break; /* found sufficient pages */
9262306a36Sopenharmony_ci		if (tmp == NULL)
9362306a36Sopenharmony_ci			/* insufficient pages, don't use any of them. */
9462306a36Sopenharmony_ci			return NULL;
9562306a36Sopenharmony_ci		page = tmp;
9662306a36Sopenharmony_ci	}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	/* add end of list marker for the returned list */
9962306a36Sopenharmony_ci	set_page_private(page, 0);
10062306a36Sopenharmony_ci	/* actual return value, and adjustment of head */
10162306a36Sopenharmony_ci	page = *head;
10262306a36Sopenharmony_ci	*head = tmp;
10362306a36Sopenharmony_ci	return page;
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci/* may be used outside of locks to find the tail of a (usually short)
10762306a36Sopenharmony_ci * "private" page chain, before adding it back to a global chain head
10862306a36Sopenharmony_ci * with page_chain_add() under a spinlock. */
10962306a36Sopenharmony_cistatic struct page *page_chain_tail(struct page *page, int *len)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	struct page *tmp;
11262306a36Sopenharmony_ci	int i = 1;
11362306a36Sopenharmony_ci	while ((tmp = page_chain_next(page))) {
11462306a36Sopenharmony_ci		++i;
11562306a36Sopenharmony_ci		page = tmp;
11662306a36Sopenharmony_ci	}
11762306a36Sopenharmony_ci	if (len)
11862306a36Sopenharmony_ci		*len = i;
11962306a36Sopenharmony_ci	return page;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic int page_chain_free(struct page *page)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	struct page *tmp;
12562306a36Sopenharmony_ci	int i = 0;
12662306a36Sopenharmony_ci	page_chain_for_each_safe(page, tmp) {
12762306a36Sopenharmony_ci		put_page(page);
12862306a36Sopenharmony_ci		++i;
12962306a36Sopenharmony_ci	}
13062306a36Sopenharmony_ci	return i;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic void page_chain_add(struct page **head,
13462306a36Sopenharmony_ci		struct page *chain_first, struct page *chain_last)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci#if 1
13762306a36Sopenharmony_ci	struct page *tmp;
13862306a36Sopenharmony_ci	tmp = page_chain_tail(chain_first, NULL);
13962306a36Sopenharmony_ci	BUG_ON(tmp != chain_last);
14062306a36Sopenharmony_ci#endif
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	/* add chain to head */
14362306a36Sopenharmony_ci	set_page_private(chain_last, (unsigned long)*head);
14462306a36Sopenharmony_ci	*head = chain_first;
14562306a36Sopenharmony_ci}
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_cistatic struct page *__drbd_alloc_pages(struct drbd_device *device,
14862306a36Sopenharmony_ci				       unsigned int number)
14962306a36Sopenharmony_ci{
15062306a36Sopenharmony_ci	struct page *page = NULL;
15162306a36Sopenharmony_ci	struct page *tmp = NULL;
15262306a36Sopenharmony_ci	unsigned int i = 0;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	/* Yes, testing drbd_pp_vacant outside the lock is racy.
15562306a36Sopenharmony_ci	 * So what. It saves a spin_lock. */
15662306a36Sopenharmony_ci	if (drbd_pp_vacant >= number) {
15762306a36Sopenharmony_ci		spin_lock(&drbd_pp_lock);
15862306a36Sopenharmony_ci		page = page_chain_del(&drbd_pp_pool, number);
15962306a36Sopenharmony_ci		if (page)
16062306a36Sopenharmony_ci			drbd_pp_vacant -= number;
16162306a36Sopenharmony_ci		spin_unlock(&drbd_pp_lock);
16262306a36Sopenharmony_ci		if (page)
16362306a36Sopenharmony_ci			return page;
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
16762306a36Sopenharmony_ci	 * "criss-cross" setup, that might cause write-out on some other DRBD,
16862306a36Sopenharmony_ci	 * which in turn might block on the other node at this very place.  */
16962306a36Sopenharmony_ci	for (i = 0; i < number; i++) {
17062306a36Sopenharmony_ci		tmp = alloc_page(GFP_TRY);
17162306a36Sopenharmony_ci		if (!tmp)
17262306a36Sopenharmony_ci			break;
17362306a36Sopenharmony_ci		set_page_private(tmp, (unsigned long)page);
17462306a36Sopenharmony_ci		page = tmp;
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (i == number)
17862306a36Sopenharmony_ci		return page;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	/* Not enough pages immediately available this time.
18162306a36Sopenharmony_ci	 * No need to jump around here, drbd_alloc_pages will retry this
18262306a36Sopenharmony_ci	 * function "soon". */
18362306a36Sopenharmony_ci	if (page) {
18462306a36Sopenharmony_ci		tmp = page_chain_tail(page, NULL);
18562306a36Sopenharmony_ci		spin_lock(&drbd_pp_lock);
18662306a36Sopenharmony_ci		page_chain_add(&drbd_pp_pool, page, tmp);
18762306a36Sopenharmony_ci		drbd_pp_vacant += i;
18862306a36Sopenharmony_ci		spin_unlock(&drbd_pp_lock);
18962306a36Sopenharmony_ci	}
19062306a36Sopenharmony_ci	return NULL;
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_cistatic void reclaim_finished_net_peer_reqs(struct drbd_device *device,
19462306a36Sopenharmony_ci					   struct list_head *to_be_freed)
19562306a36Sopenharmony_ci{
19662306a36Sopenharmony_ci	struct drbd_peer_request *peer_req, *tmp;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	/* The EEs are always appended to the end of the list. Since
19962306a36Sopenharmony_ci	   they are sent in order over the wire, they have to finish
20062306a36Sopenharmony_ci	   in order. As soon as we see the first not finished we can
20162306a36Sopenharmony_ci	   stop to examine the list... */
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
20462306a36Sopenharmony_ci		if (drbd_peer_req_has_active_page(peer_req))
20562306a36Sopenharmony_ci			break;
20662306a36Sopenharmony_ci		list_move(&peer_req->w.list, to_be_freed);
20762306a36Sopenharmony_ci	}
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	LIST_HEAD(reclaimed);
21362306a36Sopenharmony_ci	struct drbd_peer_request *peer_req, *t;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
21662306a36Sopenharmony_ci	reclaim_finished_net_peer_reqs(device, &reclaimed);
21762306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
21862306a36Sopenharmony_ci	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
21962306a36Sopenharmony_ci		drbd_free_net_peer_req(device, peer_req);
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
22562306a36Sopenharmony_ci	int vnr;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	rcu_read_lock();
22862306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
22962306a36Sopenharmony_ci		struct drbd_device *device = peer_device->device;
23062306a36Sopenharmony_ci		if (!atomic_read(&device->pp_in_use_by_net))
23162306a36Sopenharmony_ci			continue;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci		kref_get(&device->kref);
23462306a36Sopenharmony_ci		rcu_read_unlock();
23562306a36Sopenharmony_ci		drbd_reclaim_net_peer_reqs(device);
23662306a36Sopenharmony_ci		kref_put(&device->kref, drbd_destroy_device);
23762306a36Sopenharmony_ci		rcu_read_lock();
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci	rcu_read_unlock();
24062306a36Sopenharmony_ci}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci/**
24362306a36Sopenharmony_ci * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
24462306a36Sopenharmony_ci * @peer_device:	DRBD device.
24562306a36Sopenharmony_ci * @number:		number of pages requested
24662306a36Sopenharmony_ci * @retry:		whether to retry, if not enough pages are available right now
24762306a36Sopenharmony_ci *
24862306a36Sopenharmony_ci * Tries to allocate number pages, first from our own page pool, then from
24962306a36Sopenharmony_ci * the kernel.
25062306a36Sopenharmony_ci * Possibly retry until DRBD frees sufficient pages somewhere else.
25162306a36Sopenharmony_ci *
25262306a36Sopenharmony_ci * If this allocation would exceed the max_buffers setting, we throttle
25362306a36Sopenharmony_ci * allocation (schedule_timeout) to give the system some room to breathe.
25462306a36Sopenharmony_ci *
25562306a36Sopenharmony_ci * We do not use max-buffers as hard limit, because it could lead to
25662306a36Sopenharmony_ci * congestion and further to a distributed deadlock during online-verify or
25762306a36Sopenharmony_ci * (checksum based) resync, if the max-buffers, socket buffer sizes and
25862306a36Sopenharmony_ci * resync-rate settings are mis-configured.
25962306a36Sopenharmony_ci *
26062306a36Sopenharmony_ci * Returns a page chain linked via page->private.
26162306a36Sopenharmony_ci */
26262306a36Sopenharmony_cistruct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
26362306a36Sopenharmony_ci			      bool retry)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
26662306a36Sopenharmony_ci	struct page *page = NULL;
26762306a36Sopenharmony_ci	struct net_conf *nc;
26862306a36Sopenharmony_ci	DEFINE_WAIT(wait);
26962306a36Sopenharmony_ci	unsigned int mxb;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	rcu_read_lock();
27262306a36Sopenharmony_ci	nc = rcu_dereference(peer_device->connection->net_conf);
27362306a36Sopenharmony_ci	mxb = nc ? nc->max_buffers : 1000000;
27462306a36Sopenharmony_ci	rcu_read_unlock();
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	if (atomic_read(&device->pp_in_use) < mxb)
27762306a36Sopenharmony_ci		page = __drbd_alloc_pages(device, number);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	/* Try to keep the fast path fast, but occasionally we need
28062306a36Sopenharmony_ci	 * to reclaim the pages we lended to the network stack. */
28162306a36Sopenharmony_ci	if (page && atomic_read(&device->pp_in_use_by_net) > 512)
28262306a36Sopenharmony_ci		drbd_reclaim_net_peer_reqs(device);
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	while (page == NULL) {
28562306a36Sopenharmony_ci		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci		drbd_reclaim_net_peer_reqs(device);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci		if (atomic_read(&device->pp_in_use) < mxb) {
29062306a36Sopenharmony_ci			page = __drbd_alloc_pages(device, number);
29162306a36Sopenharmony_ci			if (page)
29262306a36Sopenharmony_ci				break;
29362306a36Sopenharmony_ci		}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci		if (!retry)
29662306a36Sopenharmony_ci			break;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci		if (signal_pending(current)) {
29962306a36Sopenharmony_ci			drbd_warn(device, "drbd_alloc_pages interrupted!\n");
30062306a36Sopenharmony_ci			break;
30162306a36Sopenharmony_ci		}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci		if (schedule_timeout(HZ/10) == 0)
30462306a36Sopenharmony_ci			mxb = UINT_MAX;
30562306a36Sopenharmony_ci	}
30662306a36Sopenharmony_ci	finish_wait(&drbd_pp_wait, &wait);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	if (page)
30962306a36Sopenharmony_ci		atomic_add(number, &device->pp_in_use);
31062306a36Sopenharmony_ci	return page;
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
31462306a36Sopenharmony_ci * Is also used from inside an other spin_lock_irq(&resource->req_lock);
31562306a36Sopenharmony_ci * Either links the page chain back to the global pool,
31662306a36Sopenharmony_ci * or returns all pages to the system. */
31762306a36Sopenharmony_cistatic void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
32062306a36Sopenharmony_ci	int i;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	if (page == NULL)
32362306a36Sopenharmony_ci		return;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
32662306a36Sopenharmony_ci		i = page_chain_free(page);
32762306a36Sopenharmony_ci	else {
32862306a36Sopenharmony_ci		struct page *tmp;
32962306a36Sopenharmony_ci		tmp = page_chain_tail(page, &i);
33062306a36Sopenharmony_ci		spin_lock(&drbd_pp_lock);
33162306a36Sopenharmony_ci		page_chain_add(&drbd_pp_pool, page, tmp);
33262306a36Sopenharmony_ci		drbd_pp_vacant += i;
33362306a36Sopenharmony_ci		spin_unlock(&drbd_pp_lock);
33462306a36Sopenharmony_ci	}
33562306a36Sopenharmony_ci	i = atomic_sub_return(i, a);
33662306a36Sopenharmony_ci	if (i < 0)
33762306a36Sopenharmony_ci		drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
33862306a36Sopenharmony_ci			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
33962306a36Sopenharmony_ci	wake_up(&drbd_pp_wait);
34062306a36Sopenharmony_ci}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci/*
34362306a36Sopenharmony_ciYou need to hold the req_lock:
34462306a36Sopenharmony_ci _drbd_wait_ee_list_empty()
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ciYou must not have the req_lock:
34762306a36Sopenharmony_ci drbd_free_peer_req()
34862306a36Sopenharmony_ci drbd_alloc_peer_req()
34962306a36Sopenharmony_ci drbd_free_peer_reqs()
35062306a36Sopenharmony_ci drbd_ee_fix_bhs()
35162306a36Sopenharmony_ci drbd_finish_peer_reqs()
35262306a36Sopenharmony_ci drbd_clear_done_ee()
35362306a36Sopenharmony_ci drbd_wait_ee_list_empty()
35462306a36Sopenharmony_ci*/
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci/* normal: payload_size == request size (bi_size)
35762306a36Sopenharmony_ci * w_same: payload_size == logical_block_size
35862306a36Sopenharmony_ci * trim: payload_size == 0 */
35962306a36Sopenharmony_cistruct drbd_peer_request *
36062306a36Sopenharmony_cidrbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
36162306a36Sopenharmony_ci		    unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
36462306a36Sopenharmony_ci	struct drbd_peer_request *peer_req;
36562306a36Sopenharmony_ci	struct page *page = NULL;
36662306a36Sopenharmony_ci	unsigned int nr_pages = PFN_UP(payload_size);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
36962306a36Sopenharmony_ci		return NULL;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
37262306a36Sopenharmony_ci	if (!peer_req) {
37362306a36Sopenharmony_ci		if (!(gfp_mask & __GFP_NOWARN))
37462306a36Sopenharmony_ci			drbd_err(device, "%s: allocation failed\n", __func__);
37562306a36Sopenharmony_ci		return NULL;
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	if (nr_pages) {
37962306a36Sopenharmony_ci		page = drbd_alloc_pages(peer_device, nr_pages,
38062306a36Sopenharmony_ci					gfpflags_allow_blocking(gfp_mask));
38162306a36Sopenharmony_ci		if (!page)
38262306a36Sopenharmony_ci			goto fail;
38362306a36Sopenharmony_ci	}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	memset(peer_req, 0, sizeof(*peer_req));
38662306a36Sopenharmony_ci	INIT_LIST_HEAD(&peer_req->w.list);
38762306a36Sopenharmony_ci	drbd_clear_interval(&peer_req->i);
38862306a36Sopenharmony_ci	peer_req->i.size = request_size;
38962306a36Sopenharmony_ci	peer_req->i.sector = sector;
39062306a36Sopenharmony_ci	peer_req->submit_jif = jiffies;
39162306a36Sopenharmony_ci	peer_req->peer_device = peer_device;
39262306a36Sopenharmony_ci	peer_req->pages = page;
39362306a36Sopenharmony_ci	/*
39462306a36Sopenharmony_ci	 * The block_id is opaque to the receiver.  It is not endianness
39562306a36Sopenharmony_ci	 * converted, and sent back to the sender unchanged.
39662306a36Sopenharmony_ci	 */
39762306a36Sopenharmony_ci	peer_req->block_id = id;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	return peer_req;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci fail:
40262306a36Sopenharmony_ci	mempool_free(peer_req, &drbd_ee_mempool);
40362306a36Sopenharmony_ci	return NULL;
40462306a36Sopenharmony_ci}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_civoid __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
40762306a36Sopenharmony_ci		       int is_net)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	might_sleep();
41062306a36Sopenharmony_ci	if (peer_req->flags & EE_HAS_DIGEST)
41162306a36Sopenharmony_ci		kfree(peer_req->digest);
41262306a36Sopenharmony_ci	drbd_free_pages(device, peer_req->pages, is_net);
41362306a36Sopenharmony_ci	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
41462306a36Sopenharmony_ci	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
41562306a36Sopenharmony_ci	if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
41662306a36Sopenharmony_ci		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
41762306a36Sopenharmony_ci		drbd_al_complete_io(device, &peer_req->i);
41862306a36Sopenharmony_ci	}
41962306a36Sopenharmony_ci	mempool_free(peer_req, &drbd_ee_mempool);
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ciint drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	LIST_HEAD(work_list);
42562306a36Sopenharmony_ci	struct drbd_peer_request *peer_req, *t;
42662306a36Sopenharmony_ci	int count = 0;
42762306a36Sopenharmony_ci	int is_net = list == &device->net_ee;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
43062306a36Sopenharmony_ci	list_splice_init(list, &work_list);
43162306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
43462306a36Sopenharmony_ci		__drbd_free_peer_req(device, peer_req, is_net);
43562306a36Sopenharmony_ci		count++;
43662306a36Sopenharmony_ci	}
43762306a36Sopenharmony_ci	return count;
43862306a36Sopenharmony_ci}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci/*
44162306a36Sopenharmony_ci * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
44262306a36Sopenharmony_ci */
44362306a36Sopenharmony_cistatic int drbd_finish_peer_reqs(struct drbd_device *device)
44462306a36Sopenharmony_ci{
44562306a36Sopenharmony_ci	LIST_HEAD(work_list);
44662306a36Sopenharmony_ci	LIST_HEAD(reclaimed);
44762306a36Sopenharmony_ci	struct drbd_peer_request *peer_req, *t;
44862306a36Sopenharmony_ci	int err = 0;
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
45162306a36Sopenharmony_ci	reclaim_finished_net_peer_reqs(device, &reclaimed);
45262306a36Sopenharmony_ci	list_splice_init(&device->done_ee, &work_list);
45362306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
45662306a36Sopenharmony_ci		drbd_free_net_peer_req(device, peer_req);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	/* possible callbacks here:
45962306a36Sopenharmony_ci	 * e_end_block, and e_end_resync_block, e_send_superseded.
46062306a36Sopenharmony_ci	 * all ignore the last argument.
46162306a36Sopenharmony_ci	 */
46262306a36Sopenharmony_ci	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
46362306a36Sopenharmony_ci		int err2;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci		/* list_del not necessary, next/prev members not touched */
46662306a36Sopenharmony_ci		err2 = peer_req->w.cb(&peer_req->w, !!err);
46762306a36Sopenharmony_ci		if (!err)
46862306a36Sopenharmony_ci			err = err2;
46962306a36Sopenharmony_ci		drbd_free_peer_req(device, peer_req);
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci	wake_up(&device->ee_wait);
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	return err;
47462306a36Sopenharmony_ci}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_cistatic void _drbd_wait_ee_list_empty(struct drbd_device *device,
47762306a36Sopenharmony_ci				     struct list_head *head)
47862306a36Sopenharmony_ci{
47962306a36Sopenharmony_ci	DEFINE_WAIT(wait);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	/* avoids spin_lock/unlock
48262306a36Sopenharmony_ci	 * and calling prepare_to_wait in the fast path */
48362306a36Sopenharmony_ci	while (!list_empty(head)) {
48462306a36Sopenharmony_ci		prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
48562306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
48662306a36Sopenharmony_ci		io_schedule();
48762306a36Sopenharmony_ci		finish_wait(&device->ee_wait, &wait);
48862306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
48962306a36Sopenharmony_ci	}
49062306a36Sopenharmony_ci}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_cistatic void drbd_wait_ee_list_empty(struct drbd_device *device,
49362306a36Sopenharmony_ci				    struct list_head *head)
49462306a36Sopenharmony_ci{
49562306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
49662306a36Sopenharmony_ci	_drbd_wait_ee_list_empty(device, head);
49762306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
49862306a36Sopenharmony_ci}
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_cistatic int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
50162306a36Sopenharmony_ci{
50262306a36Sopenharmony_ci	struct kvec iov = {
50362306a36Sopenharmony_ci		.iov_base = buf,
50462306a36Sopenharmony_ci		.iov_len = size,
50562306a36Sopenharmony_ci	};
50662306a36Sopenharmony_ci	struct msghdr msg = {
50762306a36Sopenharmony_ci		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
50862306a36Sopenharmony_ci	};
50962306a36Sopenharmony_ci	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
51062306a36Sopenharmony_ci	return sock_recvmsg(sock, &msg, msg.msg_flags);
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_cistatic int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	int rv;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	rv = drbd_recv_short(connection->data.socket, buf, size, 0);
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	if (rv < 0) {
52062306a36Sopenharmony_ci		if (rv == -ECONNRESET)
52162306a36Sopenharmony_ci			drbd_info(connection, "sock was reset by peer\n");
52262306a36Sopenharmony_ci		else if (rv != -ERESTARTSYS)
52362306a36Sopenharmony_ci			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
52462306a36Sopenharmony_ci	} else if (rv == 0) {
52562306a36Sopenharmony_ci		if (test_bit(DISCONNECT_SENT, &connection->flags)) {
52662306a36Sopenharmony_ci			long t;
52762306a36Sopenharmony_ci			rcu_read_lock();
52862306a36Sopenharmony_ci			t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
52962306a36Sopenharmony_ci			rcu_read_unlock();
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci			t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci			if (t)
53462306a36Sopenharmony_ci				goto out;
53562306a36Sopenharmony_ci		}
53662306a36Sopenharmony_ci		drbd_info(connection, "sock was shut down by peer\n");
53762306a36Sopenharmony_ci	}
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	if (rv != size)
54062306a36Sopenharmony_ci		conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ciout:
54362306a36Sopenharmony_ci	return rv;
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_cistatic int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	int err;
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	err = drbd_recv(connection, buf, size);
55162306a36Sopenharmony_ci	if (err != size) {
55262306a36Sopenharmony_ci		if (err >= 0)
55362306a36Sopenharmony_ci			err = -EIO;
55462306a36Sopenharmony_ci	} else
55562306a36Sopenharmony_ci		err = 0;
55662306a36Sopenharmony_ci	return err;
55762306a36Sopenharmony_ci}
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_cistatic int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
56062306a36Sopenharmony_ci{
56162306a36Sopenharmony_ci	int err;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	err = drbd_recv_all(connection, buf, size);
56462306a36Sopenharmony_ci	if (err && !signal_pending(current))
56562306a36Sopenharmony_ci		drbd_warn(connection, "short read (expected size %d)\n", (int)size);
56662306a36Sopenharmony_ci	return err;
56762306a36Sopenharmony_ci}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci/* quoting tcp(7):
57062306a36Sopenharmony_ci *   On individual connections, the socket buffer size must be set prior to the
57162306a36Sopenharmony_ci *   listen(2) or connect(2) calls in order to have it take effect.
57262306a36Sopenharmony_ci * This is our wrapper to do so.
57362306a36Sopenharmony_ci */
57462306a36Sopenharmony_cistatic void drbd_setbufsize(struct socket *sock, unsigned int snd,
57562306a36Sopenharmony_ci		unsigned int rcv)
57662306a36Sopenharmony_ci{
57762306a36Sopenharmony_ci	/* open coded SO_SNDBUF, SO_RCVBUF */
57862306a36Sopenharmony_ci	if (snd) {
57962306a36Sopenharmony_ci		sock->sk->sk_sndbuf = snd;
58062306a36Sopenharmony_ci		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
58162306a36Sopenharmony_ci	}
58262306a36Sopenharmony_ci	if (rcv) {
58362306a36Sopenharmony_ci		sock->sk->sk_rcvbuf = rcv;
58462306a36Sopenharmony_ci		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
58562306a36Sopenharmony_ci	}
58662306a36Sopenharmony_ci}
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_cistatic struct socket *drbd_try_connect(struct drbd_connection *connection)
58962306a36Sopenharmony_ci{
59062306a36Sopenharmony_ci	const char *what;
59162306a36Sopenharmony_ci	struct socket *sock;
59262306a36Sopenharmony_ci	struct sockaddr_in6 src_in6;
59362306a36Sopenharmony_ci	struct sockaddr_in6 peer_in6;
59462306a36Sopenharmony_ci	struct net_conf *nc;
59562306a36Sopenharmony_ci	int err, peer_addr_len, my_addr_len;
59662306a36Sopenharmony_ci	int sndbuf_size, rcvbuf_size, connect_int;
59762306a36Sopenharmony_ci	int disconnect_on_error = 1;
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	rcu_read_lock();
60062306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
60162306a36Sopenharmony_ci	if (!nc) {
60262306a36Sopenharmony_ci		rcu_read_unlock();
60362306a36Sopenharmony_ci		return NULL;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci	sndbuf_size = nc->sndbuf_size;
60662306a36Sopenharmony_ci	rcvbuf_size = nc->rcvbuf_size;
60762306a36Sopenharmony_ci	connect_int = nc->connect_int;
60862306a36Sopenharmony_ci	rcu_read_unlock();
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
61162306a36Sopenharmony_ci	memcpy(&src_in6, &connection->my_addr, my_addr_len);
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
61462306a36Sopenharmony_ci		src_in6.sin6_port = 0;
61562306a36Sopenharmony_ci	else
61662306a36Sopenharmony_ci		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
61962306a36Sopenharmony_ci	memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	what = "sock_create_kern";
62262306a36Sopenharmony_ci	err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
62362306a36Sopenharmony_ci			       SOCK_STREAM, IPPROTO_TCP, &sock);
62462306a36Sopenharmony_ci	if (err < 0) {
62562306a36Sopenharmony_ci		sock = NULL;
62662306a36Sopenharmony_ci		goto out;
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	sock->sk->sk_rcvtimeo =
63062306a36Sopenharmony_ci	sock->sk->sk_sndtimeo = connect_int * HZ;
63162306a36Sopenharmony_ci	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci       /* explicitly bind to the configured IP as source IP
63462306a36Sopenharmony_ci	*  for the outgoing connections.
63562306a36Sopenharmony_ci	*  This is needed for multihomed hosts and to be
63662306a36Sopenharmony_ci	*  able to use lo: interfaces for drbd.
63762306a36Sopenharmony_ci	* Make sure to use 0 as port number, so linux selects
63862306a36Sopenharmony_ci	*  a free one dynamically.
63962306a36Sopenharmony_ci	*/
64062306a36Sopenharmony_ci	what = "bind before connect";
64162306a36Sopenharmony_ci	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
64262306a36Sopenharmony_ci	if (err < 0)
64362306a36Sopenharmony_ci		goto out;
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	/* connect may fail, peer not yet available.
64662306a36Sopenharmony_ci	 * stay C_WF_CONNECTION, don't go Disconnecting! */
64762306a36Sopenharmony_ci	disconnect_on_error = 0;
64862306a36Sopenharmony_ci	what = "connect";
64962306a36Sopenharmony_ci	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ciout:
65262306a36Sopenharmony_ci	if (err < 0) {
65362306a36Sopenharmony_ci		if (sock) {
65462306a36Sopenharmony_ci			sock_release(sock);
65562306a36Sopenharmony_ci			sock = NULL;
65662306a36Sopenharmony_ci		}
65762306a36Sopenharmony_ci		switch (-err) {
65862306a36Sopenharmony_ci			/* timeout, busy, signal pending */
65962306a36Sopenharmony_ci		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
66062306a36Sopenharmony_ci		case EINTR: case ERESTARTSYS:
66162306a36Sopenharmony_ci			/* peer not (yet) available, network problem */
66262306a36Sopenharmony_ci		case ECONNREFUSED: case ENETUNREACH:
66362306a36Sopenharmony_ci		case EHOSTDOWN:    case EHOSTUNREACH:
66462306a36Sopenharmony_ci			disconnect_on_error = 0;
66562306a36Sopenharmony_ci			break;
66662306a36Sopenharmony_ci		default:
66762306a36Sopenharmony_ci			drbd_err(connection, "%s failed, err = %d\n", what, err);
66862306a36Sopenharmony_ci		}
66962306a36Sopenharmony_ci		if (disconnect_on_error)
67062306a36Sopenharmony_ci			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
67162306a36Sopenharmony_ci	}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	return sock;
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_cistruct accept_wait_data {
67762306a36Sopenharmony_ci	struct drbd_connection *connection;
67862306a36Sopenharmony_ci	struct socket *s_listen;
67962306a36Sopenharmony_ci	struct completion door_bell;
68062306a36Sopenharmony_ci	void (*original_sk_state_change)(struct sock *sk);
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci};
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_cistatic void drbd_incoming_connection(struct sock *sk)
68562306a36Sopenharmony_ci{
68662306a36Sopenharmony_ci	struct accept_wait_data *ad = sk->sk_user_data;
68762306a36Sopenharmony_ci	void (*state_change)(struct sock *sk);
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	state_change = ad->original_sk_state_change;
69062306a36Sopenharmony_ci	if (sk->sk_state == TCP_ESTABLISHED)
69162306a36Sopenharmony_ci		complete(&ad->door_bell);
69262306a36Sopenharmony_ci	state_change(sk);
69362306a36Sopenharmony_ci}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_cistatic int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
69662306a36Sopenharmony_ci{
69762306a36Sopenharmony_ci	int err, sndbuf_size, rcvbuf_size, my_addr_len;
69862306a36Sopenharmony_ci	struct sockaddr_in6 my_addr;
69962306a36Sopenharmony_ci	struct socket *s_listen;
70062306a36Sopenharmony_ci	struct net_conf *nc;
70162306a36Sopenharmony_ci	const char *what;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	rcu_read_lock();
70462306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
70562306a36Sopenharmony_ci	if (!nc) {
70662306a36Sopenharmony_ci		rcu_read_unlock();
70762306a36Sopenharmony_ci		return -EIO;
70862306a36Sopenharmony_ci	}
70962306a36Sopenharmony_ci	sndbuf_size = nc->sndbuf_size;
71062306a36Sopenharmony_ci	rcvbuf_size = nc->rcvbuf_size;
71162306a36Sopenharmony_ci	rcu_read_unlock();
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
71462306a36Sopenharmony_ci	memcpy(&my_addr, &connection->my_addr, my_addr_len);
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	what = "sock_create_kern";
71762306a36Sopenharmony_ci	err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
71862306a36Sopenharmony_ci			       SOCK_STREAM, IPPROTO_TCP, &s_listen);
71962306a36Sopenharmony_ci	if (err) {
72062306a36Sopenharmony_ci		s_listen = NULL;
72162306a36Sopenharmony_ci		goto out;
72262306a36Sopenharmony_ci	}
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
72562306a36Sopenharmony_ci	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	what = "bind before listen";
72862306a36Sopenharmony_ci	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
72962306a36Sopenharmony_ci	if (err < 0)
73062306a36Sopenharmony_ci		goto out;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	ad->s_listen = s_listen;
73362306a36Sopenharmony_ci	write_lock_bh(&s_listen->sk->sk_callback_lock);
73462306a36Sopenharmony_ci	ad->original_sk_state_change = s_listen->sk->sk_state_change;
73562306a36Sopenharmony_ci	s_listen->sk->sk_state_change = drbd_incoming_connection;
73662306a36Sopenharmony_ci	s_listen->sk->sk_user_data = ad;
73762306a36Sopenharmony_ci	write_unlock_bh(&s_listen->sk->sk_callback_lock);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	what = "listen";
74062306a36Sopenharmony_ci	err = s_listen->ops->listen(s_listen, 5);
74162306a36Sopenharmony_ci	if (err < 0)
74262306a36Sopenharmony_ci		goto out;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	return 0;
74562306a36Sopenharmony_ciout:
74662306a36Sopenharmony_ci	if (s_listen)
74762306a36Sopenharmony_ci		sock_release(s_listen);
74862306a36Sopenharmony_ci	if (err < 0) {
74962306a36Sopenharmony_ci		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
75062306a36Sopenharmony_ci			drbd_err(connection, "%s failed, err = %d\n", what, err);
75162306a36Sopenharmony_ci			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
75262306a36Sopenharmony_ci		}
75362306a36Sopenharmony_ci	}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	return -EIO;
75662306a36Sopenharmony_ci}
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_cistatic void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
75962306a36Sopenharmony_ci{
76062306a36Sopenharmony_ci	write_lock_bh(&sk->sk_callback_lock);
76162306a36Sopenharmony_ci	sk->sk_state_change = ad->original_sk_state_change;
76262306a36Sopenharmony_ci	sk->sk_user_data = NULL;
76362306a36Sopenharmony_ci	write_unlock_bh(&sk->sk_callback_lock);
76462306a36Sopenharmony_ci}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_cistatic struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
76762306a36Sopenharmony_ci{
76862306a36Sopenharmony_ci	int timeo, connect_int, err = 0;
76962306a36Sopenharmony_ci	struct socket *s_estab = NULL;
77062306a36Sopenharmony_ci	struct net_conf *nc;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	rcu_read_lock();
77362306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
77462306a36Sopenharmony_ci	if (!nc) {
77562306a36Sopenharmony_ci		rcu_read_unlock();
77662306a36Sopenharmony_ci		return NULL;
77762306a36Sopenharmony_ci	}
77862306a36Sopenharmony_ci	connect_int = nc->connect_int;
77962306a36Sopenharmony_ci	rcu_read_unlock();
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	timeo = connect_int * HZ;
78262306a36Sopenharmony_ci	/* 28.5% random jitter */
78362306a36Sopenharmony_ci	timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
78662306a36Sopenharmony_ci	if (err <= 0)
78762306a36Sopenharmony_ci		return NULL;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	err = kernel_accept(ad->s_listen, &s_estab, 0);
79062306a36Sopenharmony_ci	if (err < 0) {
79162306a36Sopenharmony_ci		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
79262306a36Sopenharmony_ci			drbd_err(connection, "accept failed, err = %d\n", err);
79362306a36Sopenharmony_ci			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
79462306a36Sopenharmony_ci		}
79562306a36Sopenharmony_ci	}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	if (s_estab)
79862306a36Sopenharmony_ci		unregister_state_change(s_estab->sk, ad);
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	return s_estab;
80162306a36Sopenharmony_ci}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_cistatic int decode_header(struct drbd_connection *, void *, struct packet_info *);
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_cistatic int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
80662306a36Sopenharmony_ci			     enum drbd_packet cmd)
80762306a36Sopenharmony_ci{
80862306a36Sopenharmony_ci	if (!conn_prepare_command(connection, sock))
80962306a36Sopenharmony_ci		return -EIO;
81062306a36Sopenharmony_ci	return conn_send_command(connection, sock, cmd, 0, NULL, 0);
81162306a36Sopenharmony_ci}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_cistatic int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	unsigned int header_size = drbd_header_size(connection);
81662306a36Sopenharmony_ci	struct packet_info pi;
81762306a36Sopenharmony_ci	struct net_conf *nc;
81862306a36Sopenharmony_ci	int err;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	rcu_read_lock();
82162306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
82262306a36Sopenharmony_ci	if (!nc) {
82362306a36Sopenharmony_ci		rcu_read_unlock();
82462306a36Sopenharmony_ci		return -EIO;
82562306a36Sopenharmony_ci	}
82662306a36Sopenharmony_ci	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
82762306a36Sopenharmony_ci	rcu_read_unlock();
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
83062306a36Sopenharmony_ci	if (err != header_size) {
83162306a36Sopenharmony_ci		if (err >= 0)
83262306a36Sopenharmony_ci			err = -EIO;
83362306a36Sopenharmony_ci		return err;
83462306a36Sopenharmony_ci	}
83562306a36Sopenharmony_ci	err = decode_header(connection, connection->data.rbuf, &pi);
83662306a36Sopenharmony_ci	if (err)
83762306a36Sopenharmony_ci		return err;
83862306a36Sopenharmony_ci	return pi.cmd;
83962306a36Sopenharmony_ci}
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci/**
84262306a36Sopenharmony_ci * drbd_socket_okay() - Free the socket if its connection is not okay
84362306a36Sopenharmony_ci * @sock:	pointer to the pointer to the socket.
84462306a36Sopenharmony_ci */
84562306a36Sopenharmony_cistatic bool drbd_socket_okay(struct socket **sock)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	int rr;
84862306a36Sopenharmony_ci	char tb[4];
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci	if (!*sock)
85162306a36Sopenharmony_ci		return false;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	if (rr > 0 || rr == -EAGAIN) {
85662306a36Sopenharmony_ci		return true;
85762306a36Sopenharmony_ci	} else {
85862306a36Sopenharmony_ci		sock_release(*sock);
85962306a36Sopenharmony_ci		*sock = NULL;
86062306a36Sopenharmony_ci		return false;
86162306a36Sopenharmony_ci	}
86262306a36Sopenharmony_ci}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_cistatic bool connection_established(struct drbd_connection *connection,
86562306a36Sopenharmony_ci				   struct socket **sock1,
86662306a36Sopenharmony_ci				   struct socket **sock2)
86762306a36Sopenharmony_ci{
86862306a36Sopenharmony_ci	struct net_conf *nc;
86962306a36Sopenharmony_ci	int timeout;
87062306a36Sopenharmony_ci	bool ok;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	if (!*sock1 || !*sock2)
87362306a36Sopenharmony_ci		return false;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	rcu_read_lock();
87662306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
87762306a36Sopenharmony_ci	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
87862306a36Sopenharmony_ci	rcu_read_unlock();
87962306a36Sopenharmony_ci	schedule_timeout_interruptible(timeout);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	ok = drbd_socket_okay(sock1);
88262306a36Sopenharmony_ci	ok = drbd_socket_okay(sock2) && ok;
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_ci	return ok;
88562306a36Sopenharmony_ci}
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci/* Gets called if a connection is established, or if a new minor gets created
88862306a36Sopenharmony_ci   in a connection */
88962306a36Sopenharmony_ciint drbd_connected(struct drbd_peer_device *peer_device)
89062306a36Sopenharmony_ci{
89162306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
89262306a36Sopenharmony_ci	int err;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	atomic_set(&device->packet_seq, 0);
89562306a36Sopenharmony_ci	device->peer_seq = 0;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
89862306a36Sopenharmony_ci		&peer_device->connection->cstate_mutex :
89962306a36Sopenharmony_ci		&device->own_state_mutex;
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	err = drbd_send_sync_param(peer_device);
90262306a36Sopenharmony_ci	if (!err)
90362306a36Sopenharmony_ci		err = drbd_send_sizes(peer_device, 0, 0);
90462306a36Sopenharmony_ci	if (!err)
90562306a36Sopenharmony_ci		err = drbd_send_uuids(peer_device);
90662306a36Sopenharmony_ci	if (!err)
90762306a36Sopenharmony_ci		err = drbd_send_current_state(peer_device);
90862306a36Sopenharmony_ci	clear_bit(USE_DEGR_WFC_T, &device->flags);
90962306a36Sopenharmony_ci	clear_bit(RESIZE_PENDING, &device->flags);
91062306a36Sopenharmony_ci	atomic_set(&device->ap_in_flight, 0);
91162306a36Sopenharmony_ci	mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
91262306a36Sopenharmony_ci	return err;
91362306a36Sopenharmony_ci}
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci/*
91662306a36Sopenharmony_ci * return values:
91762306a36Sopenharmony_ci *   1 yes, we have a valid connection
91862306a36Sopenharmony_ci *   0 oops, did not work out, please try again
91962306a36Sopenharmony_ci *  -1 peer talks different language,
92062306a36Sopenharmony_ci *     no point in trying again, please go standalone.
92162306a36Sopenharmony_ci *  -2 We do not have a network config...
92262306a36Sopenharmony_ci */
92362306a36Sopenharmony_cistatic int conn_connect(struct drbd_connection *connection)
92462306a36Sopenharmony_ci{
92562306a36Sopenharmony_ci	struct drbd_socket sock, msock;
92662306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
92762306a36Sopenharmony_ci	struct net_conf *nc;
92862306a36Sopenharmony_ci	int vnr, timeout, h;
92962306a36Sopenharmony_ci	bool discard_my_data, ok;
93062306a36Sopenharmony_ci	enum drbd_state_rv rv;
93162306a36Sopenharmony_ci	struct accept_wait_data ad = {
93262306a36Sopenharmony_ci		.connection = connection,
93362306a36Sopenharmony_ci		.door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
93462306a36Sopenharmony_ci	};
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	clear_bit(DISCONNECT_SENT, &connection->flags);
93762306a36Sopenharmony_ci	if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
93862306a36Sopenharmony_ci		return -2;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	mutex_init(&sock.mutex);
94162306a36Sopenharmony_ci	sock.sbuf = connection->data.sbuf;
94262306a36Sopenharmony_ci	sock.rbuf = connection->data.rbuf;
94362306a36Sopenharmony_ci	sock.socket = NULL;
94462306a36Sopenharmony_ci	mutex_init(&msock.mutex);
94562306a36Sopenharmony_ci	msock.sbuf = connection->meta.sbuf;
94662306a36Sopenharmony_ci	msock.rbuf = connection->meta.rbuf;
94762306a36Sopenharmony_ci	msock.socket = NULL;
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	/* Assume that the peer only understands protocol 80 until we know better.  */
95062306a36Sopenharmony_ci	connection->agreed_pro_version = 80;
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	if (prepare_listen_socket(connection, &ad))
95362306a36Sopenharmony_ci		return 0;
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	do {
95662306a36Sopenharmony_ci		struct socket *s;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci		s = drbd_try_connect(connection);
95962306a36Sopenharmony_ci		if (s) {
96062306a36Sopenharmony_ci			if (!sock.socket) {
96162306a36Sopenharmony_ci				sock.socket = s;
96262306a36Sopenharmony_ci				send_first_packet(connection, &sock, P_INITIAL_DATA);
96362306a36Sopenharmony_ci			} else if (!msock.socket) {
96462306a36Sopenharmony_ci				clear_bit(RESOLVE_CONFLICTS, &connection->flags);
96562306a36Sopenharmony_ci				msock.socket = s;
96662306a36Sopenharmony_ci				send_first_packet(connection, &msock, P_INITIAL_META);
96762306a36Sopenharmony_ci			} else {
96862306a36Sopenharmony_ci				drbd_err(connection, "Logic error in conn_connect()\n");
96962306a36Sopenharmony_ci				goto out_release_sockets;
97062306a36Sopenharmony_ci			}
97162306a36Sopenharmony_ci		}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci		if (connection_established(connection, &sock.socket, &msock.socket))
97462306a36Sopenharmony_ci			break;
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ciretry:
97762306a36Sopenharmony_ci		s = drbd_wait_for_connect(connection, &ad);
97862306a36Sopenharmony_ci		if (s) {
97962306a36Sopenharmony_ci			int fp = receive_first_packet(connection, s);
98062306a36Sopenharmony_ci			drbd_socket_okay(&sock.socket);
98162306a36Sopenharmony_ci			drbd_socket_okay(&msock.socket);
98262306a36Sopenharmony_ci			switch (fp) {
98362306a36Sopenharmony_ci			case P_INITIAL_DATA:
98462306a36Sopenharmony_ci				if (sock.socket) {
98562306a36Sopenharmony_ci					drbd_warn(connection, "initial packet S crossed\n");
98662306a36Sopenharmony_ci					sock_release(sock.socket);
98762306a36Sopenharmony_ci					sock.socket = s;
98862306a36Sopenharmony_ci					goto randomize;
98962306a36Sopenharmony_ci				}
99062306a36Sopenharmony_ci				sock.socket = s;
99162306a36Sopenharmony_ci				break;
99262306a36Sopenharmony_ci			case P_INITIAL_META:
99362306a36Sopenharmony_ci				set_bit(RESOLVE_CONFLICTS, &connection->flags);
99462306a36Sopenharmony_ci				if (msock.socket) {
99562306a36Sopenharmony_ci					drbd_warn(connection, "initial packet M crossed\n");
99662306a36Sopenharmony_ci					sock_release(msock.socket);
99762306a36Sopenharmony_ci					msock.socket = s;
99862306a36Sopenharmony_ci					goto randomize;
99962306a36Sopenharmony_ci				}
100062306a36Sopenharmony_ci				msock.socket = s;
100162306a36Sopenharmony_ci				break;
100262306a36Sopenharmony_ci			default:
100362306a36Sopenharmony_ci				drbd_warn(connection, "Error receiving initial packet\n");
100462306a36Sopenharmony_ci				sock_release(s);
100562306a36Sopenharmony_cirandomize:
100662306a36Sopenharmony_ci				if (get_random_u32_below(2))
100762306a36Sopenharmony_ci					goto retry;
100862306a36Sopenharmony_ci			}
100962306a36Sopenharmony_ci		}
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci		if (connection->cstate <= C_DISCONNECTING)
101262306a36Sopenharmony_ci			goto out_release_sockets;
101362306a36Sopenharmony_ci		if (signal_pending(current)) {
101462306a36Sopenharmony_ci			flush_signals(current);
101562306a36Sopenharmony_ci			smp_rmb();
101662306a36Sopenharmony_ci			if (get_t_state(&connection->receiver) == EXITING)
101762306a36Sopenharmony_ci				goto out_release_sockets;
101862306a36Sopenharmony_ci		}
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci		ok = connection_established(connection, &sock.socket, &msock.socket);
102162306a36Sopenharmony_ci	} while (!ok);
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	if (ad.s_listen)
102462306a36Sopenharmony_ci		sock_release(ad.s_listen);
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102762306a36Sopenharmony_ci	msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	sock.socket->sk->sk_allocation = GFP_NOIO;
103062306a36Sopenharmony_ci	msock.socket->sk->sk_allocation = GFP_NOIO;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	sock.socket->sk->sk_use_task_frag = false;
103362306a36Sopenharmony_ci	msock.socket->sk->sk_use_task_frag = false;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
103662306a36Sopenharmony_ci	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci	/* NOT YET ...
103962306a36Sopenharmony_ci	 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
104062306a36Sopenharmony_ci	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
104162306a36Sopenharmony_ci	 * first set it to the P_CONNECTION_FEATURES timeout,
104262306a36Sopenharmony_ci	 * which we set to 4x the configured ping_timeout. */
104362306a36Sopenharmony_ci	rcu_read_lock();
104462306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	sock.socket->sk->sk_sndtimeo =
104762306a36Sopenharmony_ci	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
105062306a36Sopenharmony_ci	timeout = nc->timeout * HZ / 10;
105162306a36Sopenharmony_ci	discard_my_data = nc->discard_my_data;
105262306a36Sopenharmony_ci	rcu_read_unlock();
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	msock.socket->sk->sk_sndtimeo = timeout;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	/* we don't want delays.
105762306a36Sopenharmony_ci	 * we use TCP_CORK where appropriate, though */
105862306a36Sopenharmony_ci	tcp_sock_set_nodelay(sock.socket->sk);
105962306a36Sopenharmony_ci	tcp_sock_set_nodelay(msock.socket->sk);
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	connection->data.socket = sock.socket;
106262306a36Sopenharmony_ci	connection->meta.socket = msock.socket;
106362306a36Sopenharmony_ci	connection->last_received = jiffies;
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci	h = drbd_do_features(connection);
106662306a36Sopenharmony_ci	if (h <= 0)
106762306a36Sopenharmony_ci		return h;
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci	if (connection->cram_hmac_tfm) {
107062306a36Sopenharmony_ci		/* drbd_request_state(device, NS(conn, WFAuth)); */
107162306a36Sopenharmony_ci		switch (drbd_do_auth(connection)) {
107262306a36Sopenharmony_ci		case -1:
107362306a36Sopenharmony_ci			drbd_err(connection, "Authentication of peer failed\n");
107462306a36Sopenharmony_ci			return -1;
107562306a36Sopenharmony_ci		case 0:
107662306a36Sopenharmony_ci			drbd_err(connection, "Authentication of peer failed, trying again.\n");
107762306a36Sopenharmony_ci			return 0;
107862306a36Sopenharmony_ci		}
107962306a36Sopenharmony_ci	}
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	connection->data.socket->sk->sk_sndtimeo = timeout;
108262306a36Sopenharmony_ci	connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	if (drbd_send_protocol(connection) == -EOPNOTSUPP)
108562306a36Sopenharmony_ci		return -1;
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci	/* Prevent a race between resync-handshake and
108862306a36Sopenharmony_ci	 * being promoted to Primary.
108962306a36Sopenharmony_ci	 *
109062306a36Sopenharmony_ci	 * Grab and release the state mutex, so we know that any current
109162306a36Sopenharmony_ci	 * drbd_set_role() is finished, and any incoming drbd_set_role
109262306a36Sopenharmony_ci	 * will see the STATE_SENT flag, and wait for it to be cleared.
109362306a36Sopenharmony_ci	 */
109462306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
109562306a36Sopenharmony_ci		mutex_lock(peer_device->device->state_mutex);
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	/* avoid a race with conn_request_state( C_DISCONNECTING ) */
109862306a36Sopenharmony_ci	spin_lock_irq(&connection->resource->req_lock);
109962306a36Sopenharmony_ci	set_bit(STATE_SENT, &connection->flags);
110062306a36Sopenharmony_ci	spin_unlock_irq(&connection->resource->req_lock);
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
110362306a36Sopenharmony_ci		mutex_unlock(peer_device->device->state_mutex);
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	rcu_read_lock();
110662306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
110762306a36Sopenharmony_ci		struct drbd_device *device = peer_device->device;
110862306a36Sopenharmony_ci		kref_get(&device->kref);
110962306a36Sopenharmony_ci		rcu_read_unlock();
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci		if (discard_my_data)
111262306a36Sopenharmony_ci			set_bit(DISCARD_MY_DATA, &device->flags);
111362306a36Sopenharmony_ci		else
111462306a36Sopenharmony_ci			clear_bit(DISCARD_MY_DATA, &device->flags);
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_ci		drbd_connected(peer_device);
111762306a36Sopenharmony_ci		kref_put(&device->kref, drbd_destroy_device);
111862306a36Sopenharmony_ci		rcu_read_lock();
111962306a36Sopenharmony_ci	}
112062306a36Sopenharmony_ci	rcu_read_unlock();
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci	rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
112362306a36Sopenharmony_ci	if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
112462306a36Sopenharmony_ci		clear_bit(STATE_SENT, &connection->flags);
112562306a36Sopenharmony_ci		return 0;
112662306a36Sopenharmony_ci	}
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	drbd_thread_start(&connection->ack_receiver);
112962306a36Sopenharmony_ci	/* opencoded create_singlethread_workqueue(),
113062306a36Sopenharmony_ci	 * to be able to use format string arguments */
113162306a36Sopenharmony_ci	connection->ack_sender =
113262306a36Sopenharmony_ci		alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
113362306a36Sopenharmony_ci	if (!connection->ack_sender) {
113462306a36Sopenharmony_ci		drbd_err(connection, "Failed to create workqueue ack_sender\n");
113562306a36Sopenharmony_ci		return 0;
113662306a36Sopenharmony_ci	}
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	mutex_lock(&connection->resource->conf_update);
113962306a36Sopenharmony_ci	/* The discard_my_data flag is a single-shot modifier to the next
114062306a36Sopenharmony_ci	 * connection attempt, the handshake of which is now well underway.
114162306a36Sopenharmony_ci	 * No need for rcu style copying of the whole struct
114262306a36Sopenharmony_ci	 * just to clear a single value. */
114362306a36Sopenharmony_ci	connection->net_conf->discard_my_data = 0;
114462306a36Sopenharmony_ci	mutex_unlock(&connection->resource->conf_update);
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci	return h;
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ciout_release_sockets:
114962306a36Sopenharmony_ci	if (ad.s_listen)
115062306a36Sopenharmony_ci		sock_release(ad.s_listen);
115162306a36Sopenharmony_ci	if (sock.socket)
115262306a36Sopenharmony_ci		sock_release(sock.socket);
115362306a36Sopenharmony_ci	if (msock.socket)
115462306a36Sopenharmony_ci		sock_release(msock.socket);
115562306a36Sopenharmony_ci	return -1;
115662306a36Sopenharmony_ci}
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_cistatic int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
115962306a36Sopenharmony_ci{
116062306a36Sopenharmony_ci	unsigned int header_size = drbd_header_size(connection);
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	if (header_size == sizeof(struct p_header100) &&
116362306a36Sopenharmony_ci	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
116462306a36Sopenharmony_ci		struct p_header100 *h = header;
116562306a36Sopenharmony_ci		if (h->pad != 0) {
116662306a36Sopenharmony_ci			drbd_err(connection, "Header padding is not zero\n");
116762306a36Sopenharmony_ci			return -EINVAL;
116862306a36Sopenharmony_ci		}
116962306a36Sopenharmony_ci		pi->vnr = be16_to_cpu(h->volume);
117062306a36Sopenharmony_ci		pi->cmd = be16_to_cpu(h->command);
117162306a36Sopenharmony_ci		pi->size = be32_to_cpu(h->length);
117262306a36Sopenharmony_ci	} else if (header_size == sizeof(struct p_header95) &&
117362306a36Sopenharmony_ci		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
117462306a36Sopenharmony_ci		struct p_header95 *h = header;
117562306a36Sopenharmony_ci		pi->cmd = be16_to_cpu(h->command);
117662306a36Sopenharmony_ci		pi->size = be32_to_cpu(h->length);
117762306a36Sopenharmony_ci		pi->vnr = 0;
117862306a36Sopenharmony_ci	} else if (header_size == sizeof(struct p_header80) &&
117962306a36Sopenharmony_ci		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
118062306a36Sopenharmony_ci		struct p_header80 *h = header;
118162306a36Sopenharmony_ci		pi->cmd = be16_to_cpu(h->command);
118262306a36Sopenharmony_ci		pi->size = be16_to_cpu(h->length);
118362306a36Sopenharmony_ci		pi->vnr = 0;
118462306a36Sopenharmony_ci	} else {
118562306a36Sopenharmony_ci		drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
118662306a36Sopenharmony_ci			 be32_to_cpu(*(__be32 *)header),
118762306a36Sopenharmony_ci			 connection->agreed_pro_version);
118862306a36Sopenharmony_ci		return -EINVAL;
118962306a36Sopenharmony_ci	}
119062306a36Sopenharmony_ci	pi->data = header + header_size;
119162306a36Sopenharmony_ci	return 0;
119262306a36Sopenharmony_ci}
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_cistatic void drbd_unplug_all_devices(struct drbd_connection *connection)
119562306a36Sopenharmony_ci{
119662306a36Sopenharmony_ci	if (current->plug == &connection->receiver_plug) {
119762306a36Sopenharmony_ci		blk_finish_plug(&connection->receiver_plug);
119862306a36Sopenharmony_ci		blk_start_plug(&connection->receiver_plug);
119962306a36Sopenharmony_ci	} /* else: maybe just schedule() ?? */
120062306a36Sopenharmony_ci}
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_cistatic int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
120362306a36Sopenharmony_ci{
120462306a36Sopenharmony_ci	void *buffer = connection->data.rbuf;
120562306a36Sopenharmony_ci	int err;
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci	err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
120862306a36Sopenharmony_ci	if (err)
120962306a36Sopenharmony_ci		return err;
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_ci	err = decode_header(connection, buffer, pi);
121262306a36Sopenharmony_ci	connection->last_received = jiffies;
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	return err;
121562306a36Sopenharmony_ci}
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_cistatic int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
121862306a36Sopenharmony_ci{
121962306a36Sopenharmony_ci	void *buffer = connection->data.rbuf;
122062306a36Sopenharmony_ci	unsigned int size = drbd_header_size(connection);
122162306a36Sopenharmony_ci	int err;
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
122462306a36Sopenharmony_ci	if (err != size) {
122562306a36Sopenharmony_ci		/* If we have nothing in the receive buffer now, to reduce
122662306a36Sopenharmony_ci		 * application latency, try to drain the backend queues as
122762306a36Sopenharmony_ci		 * quickly as possible, and let remote TCP know what we have
122862306a36Sopenharmony_ci		 * received so far. */
122962306a36Sopenharmony_ci		if (err == -EAGAIN) {
123062306a36Sopenharmony_ci			tcp_sock_set_quickack(connection->data.socket->sk, 2);
123162306a36Sopenharmony_ci			drbd_unplug_all_devices(connection);
123262306a36Sopenharmony_ci		}
123362306a36Sopenharmony_ci		if (err > 0) {
123462306a36Sopenharmony_ci			buffer += err;
123562306a36Sopenharmony_ci			size -= err;
123662306a36Sopenharmony_ci		}
123762306a36Sopenharmony_ci		err = drbd_recv_all_warn(connection, buffer, size);
123862306a36Sopenharmony_ci		if (err)
123962306a36Sopenharmony_ci			return err;
124062306a36Sopenharmony_ci	}
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	err = decode_header(connection, connection->data.rbuf, pi);
124362306a36Sopenharmony_ci	connection->last_received = jiffies;
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	return err;
124662306a36Sopenharmony_ci}
124762306a36Sopenharmony_ci/* This is blkdev_issue_flush, but asynchronous.
124862306a36Sopenharmony_ci * We want to submit to all component volumes in parallel,
124962306a36Sopenharmony_ci * then wait for all completions.
125062306a36Sopenharmony_ci */
125162306a36Sopenharmony_cistruct issue_flush_context {
125262306a36Sopenharmony_ci	atomic_t pending;
125362306a36Sopenharmony_ci	int error;
125462306a36Sopenharmony_ci	struct completion done;
125562306a36Sopenharmony_ci};
125662306a36Sopenharmony_cistruct one_flush_context {
125762306a36Sopenharmony_ci	struct drbd_device *device;
125862306a36Sopenharmony_ci	struct issue_flush_context *ctx;
125962306a36Sopenharmony_ci};
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_cistatic void one_flush_endio(struct bio *bio)
126262306a36Sopenharmony_ci{
126362306a36Sopenharmony_ci	struct one_flush_context *octx = bio->bi_private;
126462306a36Sopenharmony_ci	struct drbd_device *device = octx->device;
126562306a36Sopenharmony_ci	struct issue_flush_context *ctx = octx->ctx;
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci	if (bio->bi_status) {
126862306a36Sopenharmony_ci		ctx->error = blk_status_to_errno(bio->bi_status);
126962306a36Sopenharmony_ci		drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
127062306a36Sopenharmony_ci	}
127162306a36Sopenharmony_ci	kfree(octx);
127262306a36Sopenharmony_ci	bio_put(bio);
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci	clear_bit(FLUSH_PENDING, &device->flags);
127562306a36Sopenharmony_ci	put_ldev(device);
127662306a36Sopenharmony_ci	kref_put(&device->kref, drbd_destroy_device);
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	if (atomic_dec_and_test(&ctx->pending))
127962306a36Sopenharmony_ci		complete(&ctx->done);
128062306a36Sopenharmony_ci}
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_cistatic void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
128362306a36Sopenharmony_ci{
128462306a36Sopenharmony_ci	struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
128562306a36Sopenharmony_ci				    REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO);
128662306a36Sopenharmony_ci	struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	if (!octx) {
128962306a36Sopenharmony_ci		drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
129062306a36Sopenharmony_ci		/* FIXME: what else can I do now?  disconnecting or detaching
129162306a36Sopenharmony_ci		 * really does not help to improve the state of the world, either.
129262306a36Sopenharmony_ci		 */
129362306a36Sopenharmony_ci		bio_put(bio);
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ci		ctx->error = -ENOMEM;
129662306a36Sopenharmony_ci		put_ldev(device);
129762306a36Sopenharmony_ci		kref_put(&device->kref, drbd_destroy_device);
129862306a36Sopenharmony_ci		return;
129962306a36Sopenharmony_ci	}
130062306a36Sopenharmony_ci
130162306a36Sopenharmony_ci	octx->device = device;
130262306a36Sopenharmony_ci	octx->ctx = ctx;
130362306a36Sopenharmony_ci	bio->bi_private = octx;
130462306a36Sopenharmony_ci	bio->bi_end_io = one_flush_endio;
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	device->flush_jif = jiffies;
130762306a36Sopenharmony_ci	set_bit(FLUSH_PENDING, &device->flags);
130862306a36Sopenharmony_ci	atomic_inc(&ctx->pending);
130962306a36Sopenharmony_ci	submit_bio(bio);
131062306a36Sopenharmony_ci}
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_cistatic void drbd_flush(struct drbd_connection *connection)
131362306a36Sopenharmony_ci{
131462306a36Sopenharmony_ci	if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
131562306a36Sopenharmony_ci		struct drbd_peer_device *peer_device;
131662306a36Sopenharmony_ci		struct issue_flush_context ctx;
131762306a36Sopenharmony_ci		int vnr;
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci		atomic_set(&ctx.pending, 1);
132062306a36Sopenharmony_ci		ctx.error = 0;
132162306a36Sopenharmony_ci		init_completion(&ctx.done);
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_ci		rcu_read_lock();
132462306a36Sopenharmony_ci		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
132562306a36Sopenharmony_ci			struct drbd_device *device = peer_device->device;
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci			if (!get_ldev(device))
132862306a36Sopenharmony_ci				continue;
132962306a36Sopenharmony_ci			kref_get(&device->kref);
133062306a36Sopenharmony_ci			rcu_read_unlock();
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci			submit_one_flush(device, &ctx);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci			rcu_read_lock();
133562306a36Sopenharmony_ci		}
133662306a36Sopenharmony_ci		rcu_read_unlock();
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci		/* Do we want to add a timeout,
133962306a36Sopenharmony_ci		 * if disk-timeout is set? */
134062306a36Sopenharmony_ci		if (!atomic_dec_and_test(&ctx.pending))
134162306a36Sopenharmony_ci			wait_for_completion(&ctx.done);
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_ci		if (ctx.error) {
134462306a36Sopenharmony_ci			/* would rather check on EOPNOTSUPP, but that is not reliable.
134562306a36Sopenharmony_ci			 * don't try again for ANY return value != 0
134662306a36Sopenharmony_ci			 * if (rv == -EOPNOTSUPP) */
134762306a36Sopenharmony_ci			/* Any error is already reported by bio_endio callback. */
134862306a36Sopenharmony_ci			drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
134962306a36Sopenharmony_ci		}
135062306a36Sopenharmony_ci	}
135162306a36Sopenharmony_ci}
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci/**
135462306a36Sopenharmony_ci * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
135562306a36Sopenharmony_ci * @connection:	DRBD connection.
135662306a36Sopenharmony_ci * @epoch:	Epoch object.
135762306a36Sopenharmony_ci * @ev:		Epoch event.
135862306a36Sopenharmony_ci */
135962306a36Sopenharmony_cistatic enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
136062306a36Sopenharmony_ci					       struct drbd_epoch *epoch,
136162306a36Sopenharmony_ci					       enum epoch_event ev)
136262306a36Sopenharmony_ci{
136362306a36Sopenharmony_ci	int epoch_size;
136462306a36Sopenharmony_ci	struct drbd_epoch *next_epoch;
136562306a36Sopenharmony_ci	enum finish_epoch rv = FE_STILL_LIVE;
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_ci	spin_lock(&connection->epoch_lock);
136862306a36Sopenharmony_ci	do {
136962306a36Sopenharmony_ci		next_epoch = NULL;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci		epoch_size = atomic_read(&epoch->epoch_size);
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci		switch (ev & ~EV_CLEANUP) {
137462306a36Sopenharmony_ci		case EV_PUT:
137562306a36Sopenharmony_ci			atomic_dec(&epoch->active);
137662306a36Sopenharmony_ci			break;
137762306a36Sopenharmony_ci		case EV_GOT_BARRIER_NR:
137862306a36Sopenharmony_ci			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
137962306a36Sopenharmony_ci			break;
138062306a36Sopenharmony_ci		case EV_BECAME_LAST:
138162306a36Sopenharmony_ci			/* nothing to do*/
138262306a36Sopenharmony_ci			break;
138362306a36Sopenharmony_ci		}
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci		if (epoch_size != 0 &&
138662306a36Sopenharmony_ci		    atomic_read(&epoch->active) == 0 &&
138762306a36Sopenharmony_ci		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
138862306a36Sopenharmony_ci			if (!(ev & EV_CLEANUP)) {
138962306a36Sopenharmony_ci				spin_unlock(&connection->epoch_lock);
139062306a36Sopenharmony_ci				drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
139162306a36Sopenharmony_ci				spin_lock(&connection->epoch_lock);
139262306a36Sopenharmony_ci			}
139362306a36Sopenharmony_ci#if 0
139462306a36Sopenharmony_ci			/* FIXME: dec unacked on connection, once we have
139562306a36Sopenharmony_ci			 * something to count pending connection packets in. */
139662306a36Sopenharmony_ci			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
139762306a36Sopenharmony_ci				dec_unacked(epoch->connection);
139862306a36Sopenharmony_ci#endif
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci			if (connection->current_epoch != epoch) {
140162306a36Sopenharmony_ci				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
140262306a36Sopenharmony_ci				list_del(&epoch->list);
140362306a36Sopenharmony_ci				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
140462306a36Sopenharmony_ci				connection->epochs--;
140562306a36Sopenharmony_ci				kfree(epoch);
140662306a36Sopenharmony_ci
140762306a36Sopenharmony_ci				if (rv == FE_STILL_LIVE)
140862306a36Sopenharmony_ci					rv = FE_DESTROYED;
140962306a36Sopenharmony_ci			} else {
141062306a36Sopenharmony_ci				epoch->flags = 0;
141162306a36Sopenharmony_ci				atomic_set(&epoch->epoch_size, 0);
141262306a36Sopenharmony_ci				/* atomic_set(&epoch->active, 0); is already zero */
141362306a36Sopenharmony_ci				if (rv == FE_STILL_LIVE)
141462306a36Sopenharmony_ci					rv = FE_RECYCLED;
141562306a36Sopenharmony_ci			}
141662306a36Sopenharmony_ci		}
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci		if (!next_epoch)
141962306a36Sopenharmony_ci			break;
142062306a36Sopenharmony_ci
142162306a36Sopenharmony_ci		epoch = next_epoch;
142262306a36Sopenharmony_ci	} while (1);
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_ci	spin_unlock(&connection->epoch_lock);
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci	return rv;
142762306a36Sopenharmony_ci}
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_cistatic enum write_ordering_e
143062306a36Sopenharmony_cimax_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
143162306a36Sopenharmony_ci{
143262306a36Sopenharmony_ci	struct disk_conf *dc;
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_ci	dc = rcu_dereference(bdev->disk_conf);
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
143762306a36Sopenharmony_ci		wo = WO_DRAIN_IO;
143862306a36Sopenharmony_ci	if (wo == WO_DRAIN_IO && !dc->disk_drain)
143962306a36Sopenharmony_ci		wo = WO_NONE;
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci	return wo;
144262306a36Sopenharmony_ci}
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci/*
144562306a36Sopenharmony_ci * drbd_bump_write_ordering() - Fall back to an other write ordering method
144662306a36Sopenharmony_ci * @wo:		Write ordering method to try.
144762306a36Sopenharmony_ci */
144862306a36Sopenharmony_civoid drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
144962306a36Sopenharmony_ci			      enum write_ordering_e wo)
145062306a36Sopenharmony_ci{
145162306a36Sopenharmony_ci	struct drbd_device *device;
145262306a36Sopenharmony_ci	enum write_ordering_e pwo;
145362306a36Sopenharmony_ci	int vnr;
145462306a36Sopenharmony_ci	static char *write_ordering_str[] = {
145562306a36Sopenharmony_ci		[WO_NONE] = "none",
145662306a36Sopenharmony_ci		[WO_DRAIN_IO] = "drain",
145762306a36Sopenharmony_ci		[WO_BDEV_FLUSH] = "flush",
145862306a36Sopenharmony_ci	};
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci	pwo = resource->write_ordering;
146162306a36Sopenharmony_ci	if (wo != WO_BDEV_FLUSH)
146262306a36Sopenharmony_ci		wo = min(pwo, wo);
146362306a36Sopenharmony_ci	rcu_read_lock();
146462306a36Sopenharmony_ci	idr_for_each_entry(&resource->devices, device, vnr) {
146562306a36Sopenharmony_ci		if (get_ldev(device)) {
146662306a36Sopenharmony_ci			wo = max_allowed_wo(device->ldev, wo);
146762306a36Sopenharmony_ci			if (device->ldev == bdev)
146862306a36Sopenharmony_ci				bdev = NULL;
146962306a36Sopenharmony_ci			put_ldev(device);
147062306a36Sopenharmony_ci		}
147162306a36Sopenharmony_ci	}
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	if (bdev)
147462306a36Sopenharmony_ci		wo = max_allowed_wo(bdev, wo);
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ci	rcu_read_unlock();
147762306a36Sopenharmony_ci
147862306a36Sopenharmony_ci	resource->write_ordering = wo;
147962306a36Sopenharmony_ci	if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
148062306a36Sopenharmony_ci		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ci
148362306a36Sopenharmony_ci/*
148462306a36Sopenharmony_ci * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
148562306a36Sopenharmony_ci * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
148662306a36Sopenharmony_ci * will directly go to fallback mode, submitting normal writes, and
148762306a36Sopenharmony_ci * never even try to UNMAP.
148862306a36Sopenharmony_ci *
148962306a36Sopenharmony_ci * And dm-thin does not do this (yet), mostly because in general it has
149062306a36Sopenharmony_ci * to assume that "skip_block_zeroing" is set.  See also:
149162306a36Sopenharmony_ci * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
149262306a36Sopenharmony_ci * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
149362306a36Sopenharmony_ci *
149462306a36Sopenharmony_ci * We *may* ignore the discard-zeroes-data setting, if so configured.
149562306a36Sopenharmony_ci *
149662306a36Sopenharmony_ci * Assumption is that this "discard_zeroes_data=0" is only because the backend
149762306a36Sopenharmony_ci * may ignore partial unaligned discards.
149862306a36Sopenharmony_ci *
149962306a36Sopenharmony_ci * LVM/DM thin as of at least
150062306a36Sopenharmony_ci *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
150162306a36Sopenharmony_ci *   Library version: 1.02.93-RHEL7 (2015-01-28)
150262306a36Sopenharmony_ci *   Driver version:  4.29.0
150362306a36Sopenharmony_ci * still behaves this way.
150462306a36Sopenharmony_ci *
150562306a36Sopenharmony_ci * For unaligned (wrt. alignment and granularity) or too small discards,
150662306a36Sopenharmony_ci * we zero-out the initial (and/or) trailing unaligned partial chunks,
150762306a36Sopenharmony_ci * but discard all the aligned full chunks.
150862306a36Sopenharmony_ci *
150962306a36Sopenharmony_ci * At least for LVM/DM thin, with skip_block_zeroing=false,
151062306a36Sopenharmony_ci * the result is effectively "discard_zeroes_data=1".
151162306a36Sopenharmony_ci */
151262306a36Sopenharmony_ci/* flags: EE_TRIM|EE_ZEROOUT */
151362306a36Sopenharmony_ciint drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
151462306a36Sopenharmony_ci{
151562306a36Sopenharmony_ci	struct block_device *bdev = device->ldev->backing_bdev;
151662306a36Sopenharmony_ci	sector_t tmp, nr;
151762306a36Sopenharmony_ci	unsigned int max_discard_sectors, granularity;
151862306a36Sopenharmony_ci	int alignment;
151962306a36Sopenharmony_ci	int err = 0;
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ci	if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
152262306a36Sopenharmony_ci		goto zero_out;
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	/* Zero-sector (unknown) and one-sector granularities are the same.  */
152562306a36Sopenharmony_ci	granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
152662306a36Sopenharmony_ci	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
152962306a36Sopenharmony_ci	max_discard_sectors -= max_discard_sectors % granularity;
153062306a36Sopenharmony_ci	if (unlikely(!max_discard_sectors))
153162306a36Sopenharmony_ci		goto zero_out;
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	if (nr_sectors < granularity)
153462306a36Sopenharmony_ci		goto zero_out;
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci	tmp = start;
153762306a36Sopenharmony_ci	if (sector_div(tmp, granularity) != alignment) {
153862306a36Sopenharmony_ci		if (nr_sectors < 2*granularity)
153962306a36Sopenharmony_ci			goto zero_out;
154062306a36Sopenharmony_ci		/* start + gran - (start + gran - align) % gran */
154162306a36Sopenharmony_ci		tmp = start + granularity - alignment;
154262306a36Sopenharmony_ci		tmp = start + granularity - sector_div(tmp, granularity);
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_ci		nr = tmp - start;
154562306a36Sopenharmony_ci		/* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
154662306a36Sopenharmony_ci		 * layers are below us, some may have smaller granularity */
154762306a36Sopenharmony_ci		err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
154862306a36Sopenharmony_ci		nr_sectors -= nr;
154962306a36Sopenharmony_ci		start = tmp;
155062306a36Sopenharmony_ci	}
155162306a36Sopenharmony_ci	while (nr_sectors >= max_discard_sectors) {
155262306a36Sopenharmony_ci		err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
155362306a36Sopenharmony_ci					    GFP_NOIO);
155462306a36Sopenharmony_ci		nr_sectors -= max_discard_sectors;
155562306a36Sopenharmony_ci		start += max_discard_sectors;
155662306a36Sopenharmony_ci	}
155762306a36Sopenharmony_ci	if (nr_sectors) {
155862306a36Sopenharmony_ci		/* max_discard_sectors is unsigned int (and a multiple of
155962306a36Sopenharmony_ci		 * granularity, we made sure of that above already);
156062306a36Sopenharmony_ci		 * nr is < max_discard_sectors;
156162306a36Sopenharmony_ci		 * I don't need sector_div here, even though nr is sector_t */
156262306a36Sopenharmony_ci		nr = nr_sectors;
156362306a36Sopenharmony_ci		nr -= (unsigned int)nr % granularity;
156462306a36Sopenharmony_ci		if (nr) {
156562306a36Sopenharmony_ci			err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
156662306a36Sopenharmony_ci			nr_sectors -= nr;
156762306a36Sopenharmony_ci			start += nr;
156862306a36Sopenharmony_ci		}
156962306a36Sopenharmony_ci	}
157062306a36Sopenharmony_ci zero_out:
157162306a36Sopenharmony_ci	if (nr_sectors) {
157262306a36Sopenharmony_ci		err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
157362306a36Sopenharmony_ci				(flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
157462306a36Sopenharmony_ci	}
157562306a36Sopenharmony_ci	return err != 0;
157662306a36Sopenharmony_ci}
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_cistatic bool can_do_reliable_discards(struct drbd_device *device)
157962306a36Sopenharmony_ci{
158062306a36Sopenharmony_ci	struct disk_conf *dc;
158162306a36Sopenharmony_ci	bool can_do;
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci	if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
158462306a36Sopenharmony_ci		return false;
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci	rcu_read_lock();
158762306a36Sopenharmony_ci	dc = rcu_dereference(device->ldev->disk_conf);
158862306a36Sopenharmony_ci	can_do = dc->discard_zeroes_if_aligned;
158962306a36Sopenharmony_ci	rcu_read_unlock();
159062306a36Sopenharmony_ci	return can_do;
159162306a36Sopenharmony_ci}
159262306a36Sopenharmony_ci
159362306a36Sopenharmony_cistatic void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
159462306a36Sopenharmony_ci{
159562306a36Sopenharmony_ci	/* If the backend cannot discard, or does not guarantee
159662306a36Sopenharmony_ci	 * read-back zeroes in discarded ranges, we fall back to
159762306a36Sopenharmony_ci	 * zero-out.  Unless configuration specifically requested
159862306a36Sopenharmony_ci	 * otherwise. */
159962306a36Sopenharmony_ci	if (!can_do_reliable_discards(device))
160062306a36Sopenharmony_ci		peer_req->flags |= EE_ZEROOUT;
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci	if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
160362306a36Sopenharmony_ci	    peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
160462306a36Sopenharmony_ci		peer_req->flags |= EE_WAS_ERROR;
160562306a36Sopenharmony_ci	drbd_endio_write_sec_final(peer_req);
160662306a36Sopenharmony_ci}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_cistatic int peer_request_fault_type(struct drbd_peer_request *peer_req)
160962306a36Sopenharmony_ci{
161062306a36Sopenharmony_ci	if (peer_req_op(peer_req) == REQ_OP_READ) {
161162306a36Sopenharmony_ci		return peer_req->flags & EE_APPLICATION ?
161262306a36Sopenharmony_ci			DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
161362306a36Sopenharmony_ci	} else {
161462306a36Sopenharmony_ci		return peer_req->flags & EE_APPLICATION ?
161562306a36Sopenharmony_ci			DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
161662306a36Sopenharmony_ci	}
161762306a36Sopenharmony_ci}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci/**
162062306a36Sopenharmony_ci * drbd_submit_peer_request()
162162306a36Sopenharmony_ci * @peer_req:	peer request
162262306a36Sopenharmony_ci *
162362306a36Sopenharmony_ci * May spread the pages to multiple bios,
162462306a36Sopenharmony_ci * depending on bio_add_page restrictions.
162562306a36Sopenharmony_ci *
162662306a36Sopenharmony_ci * Returns 0 if all bios have been submitted,
162762306a36Sopenharmony_ci * -ENOMEM if we could not allocate enough bios,
162862306a36Sopenharmony_ci * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
162962306a36Sopenharmony_ci *  single page to an empty bio (which should never happen and likely indicates
163062306a36Sopenharmony_ci *  that the lower level IO stack is in some way broken). This has been observed
163162306a36Sopenharmony_ci *  on certain Xen deployments.
163262306a36Sopenharmony_ci */
163362306a36Sopenharmony_ci/* TODO allocate from our own bio_set. */
163462306a36Sopenharmony_ciint drbd_submit_peer_request(struct drbd_peer_request *peer_req)
163562306a36Sopenharmony_ci{
163662306a36Sopenharmony_ci	struct drbd_device *device = peer_req->peer_device->device;
163762306a36Sopenharmony_ci	struct bio *bios = NULL;
163862306a36Sopenharmony_ci	struct bio *bio;
163962306a36Sopenharmony_ci	struct page *page = peer_req->pages;
164062306a36Sopenharmony_ci	sector_t sector = peer_req->i.sector;
164162306a36Sopenharmony_ci	unsigned int data_size = peer_req->i.size;
164262306a36Sopenharmony_ci	unsigned int n_bios = 0;
164362306a36Sopenharmony_ci	unsigned int nr_pages = PFN_UP(data_size);
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci	/* TRIM/DISCARD: for now, always use the helper function
164662306a36Sopenharmony_ci	 * blkdev_issue_zeroout(..., discard=true).
164762306a36Sopenharmony_ci	 * It's synchronous, but it does the right thing wrt. bio splitting.
164862306a36Sopenharmony_ci	 * Correctness first, performance later.  Next step is to code an
164962306a36Sopenharmony_ci	 * asynchronous variant of the same.
165062306a36Sopenharmony_ci	 */
165162306a36Sopenharmony_ci	if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
165262306a36Sopenharmony_ci		/* wait for all pending IO completions, before we start
165362306a36Sopenharmony_ci		 * zeroing things out. */
165462306a36Sopenharmony_ci		conn_wait_active_ee_empty(peer_req->peer_device->connection);
165562306a36Sopenharmony_ci		/* add it to the active list now,
165662306a36Sopenharmony_ci		 * so we can find it to present it in debugfs */
165762306a36Sopenharmony_ci		peer_req->submit_jif = jiffies;
165862306a36Sopenharmony_ci		peer_req->flags |= EE_SUBMITTED;
165962306a36Sopenharmony_ci
166062306a36Sopenharmony_ci		/* If this was a resync request from receive_rs_deallocated(),
166162306a36Sopenharmony_ci		 * it is already on the sync_ee list */
166262306a36Sopenharmony_ci		if (list_empty(&peer_req->w.list)) {
166362306a36Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
166462306a36Sopenharmony_ci			list_add_tail(&peer_req->w.list, &device->active_ee);
166562306a36Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
166662306a36Sopenharmony_ci		}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci		drbd_issue_peer_discard_or_zero_out(device, peer_req);
166962306a36Sopenharmony_ci		return 0;
167062306a36Sopenharmony_ci	}
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci	/* In most cases, we will only need one bio.  But in case the lower
167362306a36Sopenharmony_ci	 * level restrictions happen to be different at this offset on this
167462306a36Sopenharmony_ci	 * side than those of the sending peer, we may need to submit the
167562306a36Sopenharmony_ci	 * request in more than one bio.
167662306a36Sopenharmony_ci	 *
167762306a36Sopenharmony_ci	 * Plain bio_alloc is good enough here, this is no DRBD internally
167862306a36Sopenharmony_ci	 * generated bio, but a bio allocated on behalf of the peer.
167962306a36Sopenharmony_ci	 */
168062306a36Sopenharmony_cinext_bio:
168162306a36Sopenharmony_ci	/* _DISCARD, _WRITE_ZEROES handled above.
168262306a36Sopenharmony_ci	 * REQ_OP_FLUSH (empty flush) not expected,
168362306a36Sopenharmony_ci	 * should have been mapped to a "drbd protocol barrier".
168462306a36Sopenharmony_ci	 * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
168562306a36Sopenharmony_ci	 */
168662306a36Sopenharmony_ci	if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
168762306a36Sopenharmony_ci				peer_req_op(peer_req) == REQ_OP_READ)) {
168862306a36Sopenharmony_ci		drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
168962306a36Sopenharmony_ci		return -EINVAL;
169062306a36Sopenharmony_ci	}
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
169362306a36Sopenharmony_ci	/* > peer_req->i.sector, unless this is the first bio */
169462306a36Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
169562306a36Sopenharmony_ci	bio->bi_private = peer_req;
169662306a36Sopenharmony_ci	bio->bi_end_io = drbd_peer_request_endio;
169762306a36Sopenharmony_ci
169862306a36Sopenharmony_ci	bio->bi_next = bios;
169962306a36Sopenharmony_ci	bios = bio;
170062306a36Sopenharmony_ci	++n_bios;
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci	page_chain_for_each(page) {
170362306a36Sopenharmony_ci		unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
170462306a36Sopenharmony_ci		if (!bio_add_page(bio, page, len, 0))
170562306a36Sopenharmony_ci			goto next_bio;
170662306a36Sopenharmony_ci		data_size -= len;
170762306a36Sopenharmony_ci		sector += len >> 9;
170862306a36Sopenharmony_ci		--nr_pages;
170962306a36Sopenharmony_ci	}
171062306a36Sopenharmony_ci	D_ASSERT(device, data_size == 0);
171162306a36Sopenharmony_ci	D_ASSERT(device, page == NULL);
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	atomic_set(&peer_req->pending_bios, n_bios);
171462306a36Sopenharmony_ci	/* for debugfs: update timestamp, mark as submitted */
171562306a36Sopenharmony_ci	peer_req->submit_jif = jiffies;
171662306a36Sopenharmony_ci	peer_req->flags |= EE_SUBMITTED;
171762306a36Sopenharmony_ci	do {
171862306a36Sopenharmony_ci		bio = bios;
171962306a36Sopenharmony_ci		bios = bios->bi_next;
172062306a36Sopenharmony_ci		bio->bi_next = NULL;
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci		drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
172362306a36Sopenharmony_ci	} while (bios);
172462306a36Sopenharmony_ci	return 0;
172562306a36Sopenharmony_ci}
172662306a36Sopenharmony_ci
172762306a36Sopenharmony_cistatic void drbd_remove_epoch_entry_interval(struct drbd_device *device,
172862306a36Sopenharmony_ci					     struct drbd_peer_request *peer_req)
172962306a36Sopenharmony_ci{
173062306a36Sopenharmony_ci	struct drbd_interval *i = &peer_req->i;
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	drbd_remove_interval(&device->write_requests, i);
173362306a36Sopenharmony_ci	drbd_clear_interval(i);
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci	/* Wake up any processes waiting for this peer request to complete.  */
173662306a36Sopenharmony_ci	if (i->waiting)
173762306a36Sopenharmony_ci		wake_up(&device->misc_wait);
173862306a36Sopenharmony_ci}
173962306a36Sopenharmony_ci
174062306a36Sopenharmony_cistatic void conn_wait_active_ee_empty(struct drbd_connection *connection)
174162306a36Sopenharmony_ci{
174262306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
174362306a36Sopenharmony_ci	int vnr;
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_ci	rcu_read_lock();
174662306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
174762306a36Sopenharmony_ci		struct drbd_device *device = peer_device->device;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci		kref_get(&device->kref);
175062306a36Sopenharmony_ci		rcu_read_unlock();
175162306a36Sopenharmony_ci		drbd_wait_ee_list_empty(device, &device->active_ee);
175262306a36Sopenharmony_ci		kref_put(&device->kref, drbd_destroy_device);
175362306a36Sopenharmony_ci		rcu_read_lock();
175462306a36Sopenharmony_ci	}
175562306a36Sopenharmony_ci	rcu_read_unlock();
175662306a36Sopenharmony_ci}
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_cistatic int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
175962306a36Sopenharmony_ci{
176062306a36Sopenharmony_ci	int rv;
176162306a36Sopenharmony_ci	struct p_barrier *p = pi->data;
176262306a36Sopenharmony_ci	struct drbd_epoch *epoch;
176362306a36Sopenharmony_ci
176462306a36Sopenharmony_ci	/* FIXME these are unacked on connection,
176562306a36Sopenharmony_ci	 * not a specific (peer)device.
176662306a36Sopenharmony_ci	 */
176762306a36Sopenharmony_ci	connection->current_epoch->barrier_nr = p->barrier;
176862306a36Sopenharmony_ci	connection->current_epoch->connection = connection;
176962306a36Sopenharmony_ci	rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_ci	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
177262306a36Sopenharmony_ci	 * the activity log, which means it would not be resynced in case the
177362306a36Sopenharmony_ci	 * R_PRIMARY crashes now.
177462306a36Sopenharmony_ci	 * Therefore we must send the barrier_ack after the barrier request was
177562306a36Sopenharmony_ci	 * completed. */
177662306a36Sopenharmony_ci	switch (connection->resource->write_ordering) {
177762306a36Sopenharmony_ci	case WO_NONE:
177862306a36Sopenharmony_ci		if (rv == FE_RECYCLED)
177962306a36Sopenharmony_ci			return 0;
178062306a36Sopenharmony_ci
178162306a36Sopenharmony_ci		/* receiver context, in the writeout path of the other node.
178262306a36Sopenharmony_ci		 * avoid potential distributed deadlock */
178362306a36Sopenharmony_ci		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
178462306a36Sopenharmony_ci		if (epoch)
178562306a36Sopenharmony_ci			break;
178662306a36Sopenharmony_ci		else
178762306a36Sopenharmony_ci			drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
178862306a36Sopenharmony_ci		fallthrough;
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ci	case WO_BDEV_FLUSH:
179162306a36Sopenharmony_ci	case WO_DRAIN_IO:
179262306a36Sopenharmony_ci		conn_wait_active_ee_empty(connection);
179362306a36Sopenharmony_ci		drbd_flush(connection);
179462306a36Sopenharmony_ci
179562306a36Sopenharmony_ci		if (atomic_read(&connection->current_epoch->epoch_size)) {
179662306a36Sopenharmony_ci			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
179762306a36Sopenharmony_ci			if (epoch)
179862306a36Sopenharmony_ci				break;
179962306a36Sopenharmony_ci		}
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci		return 0;
180262306a36Sopenharmony_ci	default:
180362306a36Sopenharmony_ci		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
180462306a36Sopenharmony_ci			 connection->resource->write_ordering);
180562306a36Sopenharmony_ci		return -EIO;
180662306a36Sopenharmony_ci	}
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	epoch->flags = 0;
180962306a36Sopenharmony_ci	atomic_set(&epoch->epoch_size, 0);
181062306a36Sopenharmony_ci	atomic_set(&epoch->active, 0);
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci	spin_lock(&connection->epoch_lock);
181362306a36Sopenharmony_ci	if (atomic_read(&connection->current_epoch->epoch_size)) {
181462306a36Sopenharmony_ci		list_add(&epoch->list, &connection->current_epoch->list);
181562306a36Sopenharmony_ci		connection->current_epoch = epoch;
181662306a36Sopenharmony_ci		connection->epochs++;
181762306a36Sopenharmony_ci	} else {
181862306a36Sopenharmony_ci		/* The current_epoch got recycled while we allocated this one... */
181962306a36Sopenharmony_ci		kfree(epoch);
182062306a36Sopenharmony_ci	}
182162306a36Sopenharmony_ci	spin_unlock(&connection->epoch_lock);
182262306a36Sopenharmony_ci
182362306a36Sopenharmony_ci	return 0;
182462306a36Sopenharmony_ci}
182562306a36Sopenharmony_ci
182662306a36Sopenharmony_ci/* quick wrapper in case payload size != request_size (write same) */
182762306a36Sopenharmony_cistatic void drbd_csum_ee_size(struct crypto_shash *h,
182862306a36Sopenharmony_ci			      struct drbd_peer_request *r, void *d,
182962306a36Sopenharmony_ci			      unsigned int payload_size)
183062306a36Sopenharmony_ci{
183162306a36Sopenharmony_ci	unsigned int tmp = r->i.size;
183262306a36Sopenharmony_ci	r->i.size = payload_size;
183362306a36Sopenharmony_ci	drbd_csum_ee(h, r, d);
183462306a36Sopenharmony_ci	r->i.size = tmp;
183562306a36Sopenharmony_ci}
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci/* used from receive_RSDataReply (recv_resync_read)
183862306a36Sopenharmony_ci * and from receive_Data.
183962306a36Sopenharmony_ci * data_size: actual payload ("data in")
184062306a36Sopenharmony_ci * 	for normal writes that is bi_size.
184162306a36Sopenharmony_ci * 	for discards, that is zero.
184262306a36Sopenharmony_ci * 	for write same, it is logical_block_size.
184362306a36Sopenharmony_ci * both trim and write same have the bi_size ("data len to be affected")
184462306a36Sopenharmony_ci * as extra argument in the packet header.
184562306a36Sopenharmony_ci */
184662306a36Sopenharmony_cistatic struct drbd_peer_request *
184762306a36Sopenharmony_ciread_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
184862306a36Sopenharmony_ci	      struct packet_info *pi) __must_hold(local)
184962306a36Sopenharmony_ci{
185062306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
185162306a36Sopenharmony_ci	const sector_t capacity = get_capacity(device->vdisk);
185262306a36Sopenharmony_ci	struct drbd_peer_request *peer_req;
185362306a36Sopenharmony_ci	struct page *page;
185462306a36Sopenharmony_ci	int digest_size, err;
185562306a36Sopenharmony_ci	unsigned int data_size = pi->size, ds;
185662306a36Sopenharmony_ci	void *dig_in = peer_device->connection->int_dig_in;
185762306a36Sopenharmony_ci	void *dig_vv = peer_device->connection->int_dig_vv;
185862306a36Sopenharmony_ci	unsigned long *data;
185962306a36Sopenharmony_ci	struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
186062306a36Sopenharmony_ci	struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci	digest_size = 0;
186362306a36Sopenharmony_ci	if (!trim && peer_device->connection->peer_integrity_tfm) {
186462306a36Sopenharmony_ci		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
186562306a36Sopenharmony_ci		/*
186662306a36Sopenharmony_ci		 * FIXME: Receive the incoming digest into the receive buffer
186762306a36Sopenharmony_ci		 *	  here, together with its struct p_data?
186862306a36Sopenharmony_ci		 */
186962306a36Sopenharmony_ci		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
187062306a36Sopenharmony_ci		if (err)
187162306a36Sopenharmony_ci			return NULL;
187262306a36Sopenharmony_ci		data_size -= digest_size;
187362306a36Sopenharmony_ci	}
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	/* assume request_size == data_size, but special case trim. */
187662306a36Sopenharmony_ci	ds = data_size;
187762306a36Sopenharmony_ci	if (trim) {
187862306a36Sopenharmony_ci		if (!expect(peer_device, data_size == 0))
187962306a36Sopenharmony_ci			return NULL;
188062306a36Sopenharmony_ci		ds = be32_to_cpu(trim->size);
188162306a36Sopenharmony_ci	} else if (zeroes) {
188262306a36Sopenharmony_ci		if (!expect(peer_device, data_size == 0))
188362306a36Sopenharmony_ci			return NULL;
188462306a36Sopenharmony_ci		ds = be32_to_cpu(zeroes->size);
188562306a36Sopenharmony_ci	}
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci	if (!expect(peer_device, IS_ALIGNED(ds, 512)))
188862306a36Sopenharmony_ci		return NULL;
188962306a36Sopenharmony_ci	if (trim || zeroes) {
189062306a36Sopenharmony_ci		if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
189162306a36Sopenharmony_ci			return NULL;
189262306a36Sopenharmony_ci	} else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE))
189362306a36Sopenharmony_ci		return NULL;
189462306a36Sopenharmony_ci
189562306a36Sopenharmony_ci	/* even though we trust out peer,
189662306a36Sopenharmony_ci	 * we sometimes have to double check. */
189762306a36Sopenharmony_ci	if (sector + (ds>>9) > capacity) {
189862306a36Sopenharmony_ci		drbd_err(device, "request from peer beyond end of local disk: "
189962306a36Sopenharmony_ci			"capacity: %llus < sector: %llus + size: %u\n",
190062306a36Sopenharmony_ci			(unsigned long long)capacity,
190162306a36Sopenharmony_ci			(unsigned long long)sector, ds);
190262306a36Sopenharmony_ci		return NULL;
190362306a36Sopenharmony_ci	}
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_ci	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
190662306a36Sopenharmony_ci	 * "criss-cross" setup, that might cause write-out on some other DRBD,
190762306a36Sopenharmony_ci	 * which in turn might block on the other node at this very place.  */
190862306a36Sopenharmony_ci	peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
190962306a36Sopenharmony_ci	if (!peer_req)
191062306a36Sopenharmony_ci		return NULL;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	peer_req->flags |= EE_WRITE;
191362306a36Sopenharmony_ci	if (trim) {
191462306a36Sopenharmony_ci		peer_req->flags |= EE_TRIM;
191562306a36Sopenharmony_ci		return peer_req;
191662306a36Sopenharmony_ci	}
191762306a36Sopenharmony_ci	if (zeroes) {
191862306a36Sopenharmony_ci		peer_req->flags |= EE_ZEROOUT;
191962306a36Sopenharmony_ci		return peer_req;
192062306a36Sopenharmony_ci	}
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ci	/* receive payload size bytes into page chain */
192362306a36Sopenharmony_ci	ds = data_size;
192462306a36Sopenharmony_ci	page = peer_req->pages;
192562306a36Sopenharmony_ci	page_chain_for_each(page) {
192662306a36Sopenharmony_ci		unsigned len = min_t(int, ds, PAGE_SIZE);
192762306a36Sopenharmony_ci		data = kmap(page);
192862306a36Sopenharmony_ci		err = drbd_recv_all_warn(peer_device->connection, data, len);
192962306a36Sopenharmony_ci		if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
193062306a36Sopenharmony_ci			drbd_err(device, "Fault injection: Corrupting data on receive\n");
193162306a36Sopenharmony_ci			data[0] = data[0] ^ (unsigned long)-1;
193262306a36Sopenharmony_ci		}
193362306a36Sopenharmony_ci		kunmap(page);
193462306a36Sopenharmony_ci		if (err) {
193562306a36Sopenharmony_ci			drbd_free_peer_req(device, peer_req);
193662306a36Sopenharmony_ci			return NULL;
193762306a36Sopenharmony_ci		}
193862306a36Sopenharmony_ci		ds -= len;
193962306a36Sopenharmony_ci	}
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	if (digest_size) {
194262306a36Sopenharmony_ci		drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
194362306a36Sopenharmony_ci		if (memcmp(dig_in, dig_vv, digest_size)) {
194462306a36Sopenharmony_ci			drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
194562306a36Sopenharmony_ci				(unsigned long long)sector, data_size);
194662306a36Sopenharmony_ci			drbd_free_peer_req(device, peer_req);
194762306a36Sopenharmony_ci			return NULL;
194862306a36Sopenharmony_ci		}
194962306a36Sopenharmony_ci	}
195062306a36Sopenharmony_ci	device->recv_cnt += data_size >> 9;
195162306a36Sopenharmony_ci	return peer_req;
195262306a36Sopenharmony_ci}
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_ci/* drbd_drain_block() just takes a data block
195562306a36Sopenharmony_ci * out of the socket input buffer, and discards it.
195662306a36Sopenharmony_ci */
195762306a36Sopenharmony_cistatic int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
195862306a36Sopenharmony_ci{
195962306a36Sopenharmony_ci	struct page *page;
196062306a36Sopenharmony_ci	int err = 0;
196162306a36Sopenharmony_ci	void *data;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	if (!data_size)
196462306a36Sopenharmony_ci		return 0;
196562306a36Sopenharmony_ci
196662306a36Sopenharmony_ci	page = drbd_alloc_pages(peer_device, 1, 1);
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	data = kmap(page);
196962306a36Sopenharmony_ci	while (data_size) {
197062306a36Sopenharmony_ci		unsigned int len = min_t(int, data_size, PAGE_SIZE);
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci		err = drbd_recv_all_warn(peer_device->connection, data, len);
197362306a36Sopenharmony_ci		if (err)
197462306a36Sopenharmony_ci			break;
197562306a36Sopenharmony_ci		data_size -= len;
197662306a36Sopenharmony_ci	}
197762306a36Sopenharmony_ci	kunmap(page);
197862306a36Sopenharmony_ci	drbd_free_pages(peer_device->device, page, 0);
197962306a36Sopenharmony_ci	return err;
198062306a36Sopenharmony_ci}
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_cistatic int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
198362306a36Sopenharmony_ci			   sector_t sector, int data_size)
198462306a36Sopenharmony_ci{
198562306a36Sopenharmony_ci	struct bio_vec bvec;
198662306a36Sopenharmony_ci	struct bvec_iter iter;
198762306a36Sopenharmony_ci	struct bio *bio;
198862306a36Sopenharmony_ci	int digest_size, err, expect;
198962306a36Sopenharmony_ci	void *dig_in = peer_device->connection->int_dig_in;
199062306a36Sopenharmony_ci	void *dig_vv = peer_device->connection->int_dig_vv;
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci	digest_size = 0;
199362306a36Sopenharmony_ci	if (peer_device->connection->peer_integrity_tfm) {
199462306a36Sopenharmony_ci		digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
199562306a36Sopenharmony_ci		err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
199662306a36Sopenharmony_ci		if (err)
199762306a36Sopenharmony_ci			return err;
199862306a36Sopenharmony_ci		data_size -= digest_size;
199962306a36Sopenharmony_ci	}
200062306a36Sopenharmony_ci
200162306a36Sopenharmony_ci	/* optimistically update recv_cnt.  if receiving fails below,
200262306a36Sopenharmony_ci	 * we disconnect anyways, and counters will be reset. */
200362306a36Sopenharmony_ci	peer_device->device->recv_cnt += data_size>>9;
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	bio = req->master_bio;
200662306a36Sopenharmony_ci	D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci	bio_for_each_segment(bvec, bio, iter) {
200962306a36Sopenharmony_ci		void *mapped = bvec_kmap_local(&bvec);
201062306a36Sopenharmony_ci		expect = min_t(int, data_size, bvec.bv_len);
201162306a36Sopenharmony_ci		err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
201262306a36Sopenharmony_ci		kunmap_local(mapped);
201362306a36Sopenharmony_ci		if (err)
201462306a36Sopenharmony_ci			return err;
201562306a36Sopenharmony_ci		data_size -= expect;
201662306a36Sopenharmony_ci	}
201762306a36Sopenharmony_ci
201862306a36Sopenharmony_ci	if (digest_size) {
201962306a36Sopenharmony_ci		drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
202062306a36Sopenharmony_ci		if (memcmp(dig_in, dig_vv, digest_size)) {
202162306a36Sopenharmony_ci			drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
202262306a36Sopenharmony_ci			return -EINVAL;
202362306a36Sopenharmony_ci		}
202462306a36Sopenharmony_ci	}
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci	D_ASSERT(peer_device->device, data_size == 0);
202762306a36Sopenharmony_ci	return 0;
202862306a36Sopenharmony_ci}
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci/*
203162306a36Sopenharmony_ci * e_end_resync_block() is called in ack_sender context via
203262306a36Sopenharmony_ci * drbd_finish_peer_reqs().
203362306a36Sopenharmony_ci */
203462306a36Sopenharmony_cistatic int e_end_resync_block(struct drbd_work *w, int unused)
203562306a36Sopenharmony_ci{
203662306a36Sopenharmony_ci	struct drbd_peer_request *peer_req =
203762306a36Sopenharmony_ci		container_of(w, struct drbd_peer_request, w);
203862306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = peer_req->peer_device;
203962306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
204062306a36Sopenharmony_ci	sector_t sector = peer_req->i.sector;
204162306a36Sopenharmony_ci	int err;
204262306a36Sopenharmony_ci
204362306a36Sopenharmony_ci	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
204662306a36Sopenharmony_ci		drbd_set_in_sync(peer_device, sector, peer_req->i.size);
204762306a36Sopenharmony_ci		err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
204862306a36Sopenharmony_ci	} else {
204962306a36Sopenharmony_ci		/* Record failure to sync */
205062306a36Sopenharmony_ci		drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
205162306a36Sopenharmony_ci
205262306a36Sopenharmony_ci		err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
205362306a36Sopenharmony_ci	}
205462306a36Sopenharmony_ci	dec_unacked(device);
205562306a36Sopenharmony_ci
205662306a36Sopenharmony_ci	return err;
205762306a36Sopenharmony_ci}
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_cistatic int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
206062306a36Sopenharmony_ci			    struct packet_info *pi) __releases(local)
206162306a36Sopenharmony_ci{
206262306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
206362306a36Sopenharmony_ci	struct drbd_peer_request *peer_req;
206462306a36Sopenharmony_ci
206562306a36Sopenharmony_ci	peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
206662306a36Sopenharmony_ci	if (!peer_req)
206762306a36Sopenharmony_ci		goto fail;
206862306a36Sopenharmony_ci
206962306a36Sopenharmony_ci	dec_rs_pending(peer_device);
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci	inc_unacked(device);
207262306a36Sopenharmony_ci	/* corresponding dec_unacked() in e_end_resync_block()
207362306a36Sopenharmony_ci	 * respective _drbd_clear_done_ee */
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci	peer_req->w.cb = e_end_resync_block;
207662306a36Sopenharmony_ci	peer_req->opf = REQ_OP_WRITE;
207762306a36Sopenharmony_ci	peer_req->submit_jif = jiffies;
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
208062306a36Sopenharmony_ci	list_add_tail(&peer_req->w.list, &device->sync_ee);
208162306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
208262306a36Sopenharmony_ci
208362306a36Sopenharmony_ci	atomic_add(pi->size >> 9, &device->rs_sect_ev);
208462306a36Sopenharmony_ci	if (drbd_submit_peer_request(peer_req) == 0)
208562306a36Sopenharmony_ci		return 0;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	/* don't care for the reason here */
208862306a36Sopenharmony_ci	drbd_err(device, "submit failed, triggering re-connect\n");
208962306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
209062306a36Sopenharmony_ci	list_del(&peer_req->w.list);
209162306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
209262306a36Sopenharmony_ci
209362306a36Sopenharmony_ci	drbd_free_peer_req(device, peer_req);
209462306a36Sopenharmony_cifail:
209562306a36Sopenharmony_ci	put_ldev(device);
209662306a36Sopenharmony_ci	return -EIO;
209762306a36Sopenharmony_ci}
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_cistatic struct drbd_request *
210062306a36Sopenharmony_cifind_request(struct drbd_device *device, struct rb_root *root, u64 id,
210162306a36Sopenharmony_ci	     sector_t sector, bool missing_ok, const char *func)
210262306a36Sopenharmony_ci{
210362306a36Sopenharmony_ci	struct drbd_request *req;
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_ci	/* Request object according to our peer */
210662306a36Sopenharmony_ci	req = (struct drbd_request *)(unsigned long)id;
210762306a36Sopenharmony_ci	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
210862306a36Sopenharmony_ci		return req;
210962306a36Sopenharmony_ci	if (!missing_ok) {
211062306a36Sopenharmony_ci		drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
211162306a36Sopenharmony_ci			(unsigned long)id, (unsigned long long)sector);
211262306a36Sopenharmony_ci	}
211362306a36Sopenharmony_ci	return NULL;
211462306a36Sopenharmony_ci}
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_cistatic int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
211762306a36Sopenharmony_ci{
211862306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
211962306a36Sopenharmony_ci	struct drbd_device *device;
212062306a36Sopenharmony_ci	struct drbd_request *req;
212162306a36Sopenharmony_ci	sector_t sector;
212262306a36Sopenharmony_ci	int err;
212362306a36Sopenharmony_ci	struct p_data *p = pi->data;
212462306a36Sopenharmony_ci
212562306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
212662306a36Sopenharmony_ci	if (!peer_device)
212762306a36Sopenharmony_ci		return -EIO;
212862306a36Sopenharmony_ci	device = peer_device->device;
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
213362306a36Sopenharmony_ci	req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
213462306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
213562306a36Sopenharmony_ci	if (unlikely(!req))
213662306a36Sopenharmony_ci		return -EIO;
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci	err = recv_dless_read(peer_device, req, sector, pi->size);
213962306a36Sopenharmony_ci	if (!err)
214062306a36Sopenharmony_ci		req_mod(req, DATA_RECEIVED, peer_device);
214162306a36Sopenharmony_ci	/* else: nothing. handled from drbd_disconnect...
214262306a36Sopenharmony_ci	 * I don't think we may complete this just yet
214362306a36Sopenharmony_ci	 * in case we are "on-disconnect: freeze" */
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	return err;
214662306a36Sopenharmony_ci}
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_cistatic int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
214962306a36Sopenharmony_ci{
215062306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
215162306a36Sopenharmony_ci	struct drbd_device *device;
215262306a36Sopenharmony_ci	sector_t sector;
215362306a36Sopenharmony_ci	int err;
215462306a36Sopenharmony_ci	struct p_data *p = pi->data;
215562306a36Sopenharmony_ci
215662306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
215762306a36Sopenharmony_ci	if (!peer_device)
215862306a36Sopenharmony_ci		return -EIO;
215962306a36Sopenharmony_ci	device = peer_device->device;
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
216262306a36Sopenharmony_ci	D_ASSERT(device, p->block_id == ID_SYNCER);
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_ci	if (get_ldev(device)) {
216562306a36Sopenharmony_ci		/* data is submitted to disk within recv_resync_read.
216662306a36Sopenharmony_ci		 * corresponding put_ldev done below on error,
216762306a36Sopenharmony_ci		 * or in drbd_peer_request_endio. */
216862306a36Sopenharmony_ci		err = recv_resync_read(peer_device, sector, pi);
216962306a36Sopenharmony_ci	} else {
217062306a36Sopenharmony_ci		if (drbd_ratelimit())
217162306a36Sopenharmony_ci			drbd_err(device, "Can not write resync data to local disk.\n");
217262306a36Sopenharmony_ci
217362306a36Sopenharmony_ci		err = drbd_drain_block(peer_device, pi->size);
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
217662306a36Sopenharmony_ci	}
217762306a36Sopenharmony_ci
217862306a36Sopenharmony_ci	atomic_add(pi->size >> 9, &device->rs_sect_in);
217962306a36Sopenharmony_ci
218062306a36Sopenharmony_ci	return err;
218162306a36Sopenharmony_ci}
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_cistatic void restart_conflicting_writes(struct drbd_device *device,
218462306a36Sopenharmony_ci				       sector_t sector, int size)
218562306a36Sopenharmony_ci{
218662306a36Sopenharmony_ci	struct drbd_interval *i;
218762306a36Sopenharmony_ci	struct drbd_request *req;
218862306a36Sopenharmony_ci
218962306a36Sopenharmony_ci	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
219062306a36Sopenharmony_ci		if (!i->local)
219162306a36Sopenharmony_ci			continue;
219262306a36Sopenharmony_ci		req = container_of(i, struct drbd_request, i);
219362306a36Sopenharmony_ci		if (req->rq_state & RQ_LOCAL_PENDING ||
219462306a36Sopenharmony_ci		    !(req->rq_state & RQ_POSTPONED))
219562306a36Sopenharmony_ci			continue;
219662306a36Sopenharmony_ci		/* as it is RQ_POSTPONED, this will cause it to
219762306a36Sopenharmony_ci		 * be queued on the retry workqueue. */
219862306a36Sopenharmony_ci		__req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
219962306a36Sopenharmony_ci	}
220062306a36Sopenharmony_ci}
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci/*
220362306a36Sopenharmony_ci * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
220462306a36Sopenharmony_ci */
220562306a36Sopenharmony_cistatic int e_end_block(struct drbd_work *w, int cancel)
220662306a36Sopenharmony_ci{
220762306a36Sopenharmony_ci	struct drbd_peer_request *peer_req =
220862306a36Sopenharmony_ci		container_of(w, struct drbd_peer_request, w);
220962306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = peer_req->peer_device;
221062306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
221162306a36Sopenharmony_ci	sector_t sector = peer_req->i.sector;
221262306a36Sopenharmony_ci	int err = 0, pcmd;
221362306a36Sopenharmony_ci
221462306a36Sopenharmony_ci	if (peer_req->flags & EE_SEND_WRITE_ACK) {
221562306a36Sopenharmony_ci		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
221662306a36Sopenharmony_ci			pcmd = (device->state.conn >= C_SYNC_SOURCE &&
221762306a36Sopenharmony_ci				device->state.conn <= C_PAUSED_SYNC_T &&
221862306a36Sopenharmony_ci				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
221962306a36Sopenharmony_ci				P_RS_WRITE_ACK : P_WRITE_ACK;
222062306a36Sopenharmony_ci			err = drbd_send_ack(peer_device, pcmd, peer_req);
222162306a36Sopenharmony_ci			if (pcmd == P_RS_WRITE_ACK)
222262306a36Sopenharmony_ci				drbd_set_in_sync(peer_device, sector, peer_req->i.size);
222362306a36Sopenharmony_ci		} else {
222462306a36Sopenharmony_ci			err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
222562306a36Sopenharmony_ci			/* we expect it to be marked out of sync anyways...
222662306a36Sopenharmony_ci			 * maybe assert this?  */
222762306a36Sopenharmony_ci		}
222862306a36Sopenharmony_ci		dec_unacked(device);
222962306a36Sopenharmony_ci	}
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci	/* we delete from the conflict detection hash _after_ we sent out the
223262306a36Sopenharmony_ci	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
223362306a36Sopenharmony_ci	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
223462306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
223562306a36Sopenharmony_ci		D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
223662306a36Sopenharmony_ci		drbd_remove_epoch_entry_interval(device, peer_req);
223762306a36Sopenharmony_ci		if (peer_req->flags & EE_RESTART_REQUESTS)
223862306a36Sopenharmony_ci			restart_conflicting_writes(device, sector, peer_req->i.size);
223962306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
224062306a36Sopenharmony_ci	} else
224162306a36Sopenharmony_ci		D_ASSERT(device, drbd_interval_empty(&peer_req->i));
224262306a36Sopenharmony_ci
224362306a36Sopenharmony_ci	drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci	return err;
224662306a36Sopenharmony_ci}
224762306a36Sopenharmony_ci
224862306a36Sopenharmony_cistatic int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
224962306a36Sopenharmony_ci{
225062306a36Sopenharmony_ci	struct drbd_peer_request *peer_req =
225162306a36Sopenharmony_ci		container_of(w, struct drbd_peer_request, w);
225262306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = peer_req->peer_device;
225362306a36Sopenharmony_ci	int err;
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci	err = drbd_send_ack(peer_device, ack, peer_req);
225662306a36Sopenharmony_ci	dec_unacked(peer_device->device);
225762306a36Sopenharmony_ci
225862306a36Sopenharmony_ci	return err;
225962306a36Sopenharmony_ci}
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_cistatic int e_send_superseded(struct drbd_work *w, int unused)
226262306a36Sopenharmony_ci{
226362306a36Sopenharmony_ci	return e_send_ack(w, P_SUPERSEDED);
226462306a36Sopenharmony_ci}
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_cistatic int e_send_retry_write(struct drbd_work *w, int unused)
226762306a36Sopenharmony_ci{
226862306a36Sopenharmony_ci	struct drbd_peer_request *peer_req =
226962306a36Sopenharmony_ci		container_of(w, struct drbd_peer_request, w);
227062306a36Sopenharmony_ci	struct drbd_connection *connection = peer_req->peer_device->connection;
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci	return e_send_ack(w, connection->agreed_pro_version >= 100 ?
227362306a36Sopenharmony_ci			     P_RETRY_WRITE : P_SUPERSEDED);
227462306a36Sopenharmony_ci}
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_cistatic bool seq_greater(u32 a, u32 b)
227762306a36Sopenharmony_ci{
227862306a36Sopenharmony_ci	/*
227962306a36Sopenharmony_ci	 * We assume 32-bit wrap-around here.
228062306a36Sopenharmony_ci	 * For 24-bit wrap-around, we would have to shift:
228162306a36Sopenharmony_ci	 *  a <<= 8; b <<= 8;
228262306a36Sopenharmony_ci	 */
228362306a36Sopenharmony_ci	return (s32)a - (s32)b > 0;
228462306a36Sopenharmony_ci}
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_cistatic u32 seq_max(u32 a, u32 b)
228762306a36Sopenharmony_ci{
228862306a36Sopenharmony_ci	return seq_greater(a, b) ? a : b;
228962306a36Sopenharmony_ci}
229062306a36Sopenharmony_ci
229162306a36Sopenharmony_cistatic void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
229262306a36Sopenharmony_ci{
229362306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
229462306a36Sopenharmony_ci	unsigned int newest_peer_seq;
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_ci	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
229762306a36Sopenharmony_ci		spin_lock(&device->peer_seq_lock);
229862306a36Sopenharmony_ci		newest_peer_seq = seq_max(device->peer_seq, peer_seq);
229962306a36Sopenharmony_ci		device->peer_seq = newest_peer_seq;
230062306a36Sopenharmony_ci		spin_unlock(&device->peer_seq_lock);
230162306a36Sopenharmony_ci		/* wake up only if we actually changed device->peer_seq */
230262306a36Sopenharmony_ci		if (peer_seq == newest_peer_seq)
230362306a36Sopenharmony_ci			wake_up(&device->seq_wait);
230462306a36Sopenharmony_ci	}
230562306a36Sopenharmony_ci}
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_cistatic inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
230862306a36Sopenharmony_ci{
230962306a36Sopenharmony_ci	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
231062306a36Sopenharmony_ci}
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_ci/* maybe change sync_ee into interval trees as well? */
231362306a36Sopenharmony_cistatic bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
231462306a36Sopenharmony_ci{
231562306a36Sopenharmony_ci	struct drbd_peer_request *rs_req;
231662306a36Sopenharmony_ci	bool rv = false;
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
231962306a36Sopenharmony_ci	list_for_each_entry(rs_req, &device->sync_ee, w.list) {
232062306a36Sopenharmony_ci		if (overlaps(peer_req->i.sector, peer_req->i.size,
232162306a36Sopenharmony_ci			     rs_req->i.sector, rs_req->i.size)) {
232262306a36Sopenharmony_ci			rv = true;
232362306a36Sopenharmony_ci			break;
232462306a36Sopenharmony_ci		}
232562306a36Sopenharmony_ci	}
232662306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_ci	return rv;
232962306a36Sopenharmony_ci}
233062306a36Sopenharmony_ci
233162306a36Sopenharmony_ci/* Called from receive_Data.
233262306a36Sopenharmony_ci * Synchronize packets on sock with packets on msock.
233362306a36Sopenharmony_ci *
233462306a36Sopenharmony_ci * This is here so even when a P_DATA packet traveling via sock overtook an Ack
233562306a36Sopenharmony_ci * packet traveling on msock, they are still processed in the order they have
233662306a36Sopenharmony_ci * been sent.
233762306a36Sopenharmony_ci *
233862306a36Sopenharmony_ci * Note: we don't care for Ack packets overtaking P_DATA packets.
233962306a36Sopenharmony_ci *
234062306a36Sopenharmony_ci * In case packet_seq is larger than device->peer_seq number, there are
234162306a36Sopenharmony_ci * outstanding packets on the msock. We wait for them to arrive.
234262306a36Sopenharmony_ci * In case we are the logically next packet, we update device->peer_seq
234362306a36Sopenharmony_ci * ourselves. Correctly handles 32bit wrap around.
234462306a36Sopenharmony_ci *
234562306a36Sopenharmony_ci * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
234662306a36Sopenharmony_ci * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
234762306a36Sopenharmony_ci * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
234862306a36Sopenharmony_ci * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
234962306a36Sopenharmony_ci *
235062306a36Sopenharmony_ci * returns 0 if we may process the packet,
235162306a36Sopenharmony_ci * -ERESTARTSYS if we were interrupted (by disconnect signal). */
235262306a36Sopenharmony_cistatic int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
235362306a36Sopenharmony_ci{
235462306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
235562306a36Sopenharmony_ci	DEFINE_WAIT(wait);
235662306a36Sopenharmony_ci	long timeout;
235762306a36Sopenharmony_ci	int ret = 0, tp;
235862306a36Sopenharmony_ci
235962306a36Sopenharmony_ci	if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
236062306a36Sopenharmony_ci		return 0;
236162306a36Sopenharmony_ci
236262306a36Sopenharmony_ci	spin_lock(&device->peer_seq_lock);
236362306a36Sopenharmony_ci	for (;;) {
236462306a36Sopenharmony_ci		if (!seq_greater(peer_seq - 1, device->peer_seq)) {
236562306a36Sopenharmony_ci			device->peer_seq = seq_max(device->peer_seq, peer_seq);
236662306a36Sopenharmony_ci			break;
236762306a36Sopenharmony_ci		}
236862306a36Sopenharmony_ci
236962306a36Sopenharmony_ci		if (signal_pending(current)) {
237062306a36Sopenharmony_ci			ret = -ERESTARTSYS;
237162306a36Sopenharmony_ci			break;
237262306a36Sopenharmony_ci		}
237362306a36Sopenharmony_ci
237462306a36Sopenharmony_ci		rcu_read_lock();
237562306a36Sopenharmony_ci		tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
237662306a36Sopenharmony_ci		rcu_read_unlock();
237762306a36Sopenharmony_ci
237862306a36Sopenharmony_ci		if (!tp)
237962306a36Sopenharmony_ci			break;
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_ci		/* Only need to wait if two_primaries is enabled */
238262306a36Sopenharmony_ci		prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
238362306a36Sopenharmony_ci		spin_unlock(&device->peer_seq_lock);
238462306a36Sopenharmony_ci		rcu_read_lock();
238562306a36Sopenharmony_ci		timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
238662306a36Sopenharmony_ci		rcu_read_unlock();
238762306a36Sopenharmony_ci		timeout = schedule_timeout(timeout);
238862306a36Sopenharmony_ci		spin_lock(&device->peer_seq_lock);
238962306a36Sopenharmony_ci		if (!timeout) {
239062306a36Sopenharmony_ci			ret = -ETIMEDOUT;
239162306a36Sopenharmony_ci			drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
239262306a36Sopenharmony_ci			break;
239362306a36Sopenharmony_ci		}
239462306a36Sopenharmony_ci	}
239562306a36Sopenharmony_ci	spin_unlock(&device->peer_seq_lock);
239662306a36Sopenharmony_ci	finish_wait(&device->seq_wait, &wait);
239762306a36Sopenharmony_ci	return ret;
239862306a36Sopenharmony_ci}
239962306a36Sopenharmony_ci
240062306a36Sopenharmony_cistatic enum req_op wire_flags_to_bio_op(u32 dpf)
240162306a36Sopenharmony_ci{
240262306a36Sopenharmony_ci	if (dpf & DP_ZEROES)
240362306a36Sopenharmony_ci		return REQ_OP_WRITE_ZEROES;
240462306a36Sopenharmony_ci	if (dpf & DP_DISCARD)
240562306a36Sopenharmony_ci		return REQ_OP_DISCARD;
240662306a36Sopenharmony_ci	else
240762306a36Sopenharmony_ci		return REQ_OP_WRITE;
240862306a36Sopenharmony_ci}
240962306a36Sopenharmony_ci
241062306a36Sopenharmony_ci/* see also bio_flags_to_wire() */
241162306a36Sopenharmony_cistatic blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
241262306a36Sopenharmony_ci{
241362306a36Sopenharmony_ci	return wire_flags_to_bio_op(dpf) |
241462306a36Sopenharmony_ci		(dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
241562306a36Sopenharmony_ci		(dpf & DP_FUA ? REQ_FUA : 0) |
241662306a36Sopenharmony_ci		(dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
241762306a36Sopenharmony_ci}
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_cistatic void fail_postponed_requests(struct drbd_device *device, sector_t sector,
242062306a36Sopenharmony_ci				    unsigned int size)
242162306a36Sopenharmony_ci{
242262306a36Sopenharmony_ci	struct drbd_peer_device *peer_device = first_peer_device(device);
242362306a36Sopenharmony_ci	struct drbd_interval *i;
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci    repeat:
242662306a36Sopenharmony_ci	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
242762306a36Sopenharmony_ci		struct drbd_request *req;
242862306a36Sopenharmony_ci		struct bio_and_error m;
242962306a36Sopenharmony_ci
243062306a36Sopenharmony_ci		if (!i->local)
243162306a36Sopenharmony_ci			continue;
243262306a36Sopenharmony_ci		req = container_of(i, struct drbd_request, i);
243362306a36Sopenharmony_ci		if (!(req->rq_state & RQ_POSTPONED))
243462306a36Sopenharmony_ci			continue;
243562306a36Sopenharmony_ci		req->rq_state &= ~RQ_POSTPONED;
243662306a36Sopenharmony_ci		__req_mod(req, NEG_ACKED, peer_device, &m);
243762306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
243862306a36Sopenharmony_ci		if (m.bio)
243962306a36Sopenharmony_ci			complete_master_bio(device, &m);
244062306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
244162306a36Sopenharmony_ci		goto repeat;
244262306a36Sopenharmony_ci	}
244362306a36Sopenharmony_ci}
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_cistatic int handle_write_conflicts(struct drbd_device *device,
244662306a36Sopenharmony_ci				  struct drbd_peer_request *peer_req)
244762306a36Sopenharmony_ci{
244862306a36Sopenharmony_ci	struct drbd_connection *connection = peer_req->peer_device->connection;
244962306a36Sopenharmony_ci	bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
245062306a36Sopenharmony_ci	sector_t sector = peer_req->i.sector;
245162306a36Sopenharmony_ci	const unsigned int size = peer_req->i.size;
245262306a36Sopenharmony_ci	struct drbd_interval *i;
245362306a36Sopenharmony_ci	bool equal;
245462306a36Sopenharmony_ci	int err;
245562306a36Sopenharmony_ci
245662306a36Sopenharmony_ci	/*
245762306a36Sopenharmony_ci	 * Inserting the peer request into the write_requests tree will prevent
245862306a36Sopenharmony_ci	 * new conflicting local requests from being added.
245962306a36Sopenharmony_ci	 */
246062306a36Sopenharmony_ci	drbd_insert_interval(&device->write_requests, &peer_req->i);
246162306a36Sopenharmony_ci
246262306a36Sopenharmony_ci    repeat:
246362306a36Sopenharmony_ci	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
246462306a36Sopenharmony_ci		if (i == &peer_req->i)
246562306a36Sopenharmony_ci			continue;
246662306a36Sopenharmony_ci		if (i->completed)
246762306a36Sopenharmony_ci			continue;
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_ci		if (!i->local) {
247062306a36Sopenharmony_ci			/*
247162306a36Sopenharmony_ci			 * Our peer has sent a conflicting remote request; this
247262306a36Sopenharmony_ci			 * should not happen in a two-node setup.  Wait for the
247362306a36Sopenharmony_ci			 * earlier peer request to complete.
247462306a36Sopenharmony_ci			 */
247562306a36Sopenharmony_ci			err = drbd_wait_misc(device, i);
247662306a36Sopenharmony_ci			if (err)
247762306a36Sopenharmony_ci				goto out;
247862306a36Sopenharmony_ci			goto repeat;
247962306a36Sopenharmony_ci		}
248062306a36Sopenharmony_ci
248162306a36Sopenharmony_ci		equal = i->sector == sector && i->size == size;
248262306a36Sopenharmony_ci		if (resolve_conflicts) {
248362306a36Sopenharmony_ci			/*
248462306a36Sopenharmony_ci			 * If the peer request is fully contained within the
248562306a36Sopenharmony_ci			 * overlapping request, it can be considered overwritten
248662306a36Sopenharmony_ci			 * and thus superseded; otherwise, it will be retried
248762306a36Sopenharmony_ci			 * once all overlapping requests have completed.
248862306a36Sopenharmony_ci			 */
248962306a36Sopenharmony_ci			bool superseded = i->sector <= sector && i->sector +
249062306a36Sopenharmony_ci				       (i->size >> 9) >= sector + (size >> 9);
249162306a36Sopenharmony_ci
249262306a36Sopenharmony_ci			if (!equal)
249362306a36Sopenharmony_ci				drbd_alert(device, "Concurrent writes detected: "
249462306a36Sopenharmony_ci					       "local=%llus +%u, remote=%llus +%u, "
249562306a36Sopenharmony_ci					       "assuming %s came first\n",
249662306a36Sopenharmony_ci					  (unsigned long long)i->sector, i->size,
249762306a36Sopenharmony_ci					  (unsigned long long)sector, size,
249862306a36Sopenharmony_ci					  superseded ? "local" : "remote");
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci			peer_req->w.cb = superseded ? e_send_superseded :
250162306a36Sopenharmony_ci						   e_send_retry_write;
250262306a36Sopenharmony_ci			list_add_tail(&peer_req->w.list, &device->done_ee);
250362306a36Sopenharmony_ci			queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_ci			err = -ENOENT;
250662306a36Sopenharmony_ci			goto out;
250762306a36Sopenharmony_ci		} else {
250862306a36Sopenharmony_ci			struct drbd_request *req =
250962306a36Sopenharmony_ci				container_of(i, struct drbd_request, i);
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_ci			if (!equal)
251262306a36Sopenharmony_ci				drbd_alert(device, "Concurrent writes detected: "
251362306a36Sopenharmony_ci					       "local=%llus +%u, remote=%llus +%u\n",
251462306a36Sopenharmony_ci					  (unsigned long long)i->sector, i->size,
251562306a36Sopenharmony_ci					  (unsigned long long)sector, size);
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_ci			if (req->rq_state & RQ_LOCAL_PENDING ||
251862306a36Sopenharmony_ci			    !(req->rq_state & RQ_POSTPONED)) {
251962306a36Sopenharmony_ci				/*
252062306a36Sopenharmony_ci				 * Wait for the node with the discard flag to
252162306a36Sopenharmony_ci				 * decide if this request has been superseded
252262306a36Sopenharmony_ci				 * or needs to be retried.
252362306a36Sopenharmony_ci				 * Requests that have been superseded will
252462306a36Sopenharmony_ci				 * disappear from the write_requests tree.
252562306a36Sopenharmony_ci				 *
252662306a36Sopenharmony_ci				 * In addition, wait for the conflicting
252762306a36Sopenharmony_ci				 * request to finish locally before submitting
252862306a36Sopenharmony_ci				 * the conflicting peer request.
252962306a36Sopenharmony_ci				 */
253062306a36Sopenharmony_ci				err = drbd_wait_misc(device, &req->i);
253162306a36Sopenharmony_ci				if (err) {
253262306a36Sopenharmony_ci					_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
253362306a36Sopenharmony_ci					fail_postponed_requests(device, sector, size);
253462306a36Sopenharmony_ci					goto out;
253562306a36Sopenharmony_ci				}
253662306a36Sopenharmony_ci				goto repeat;
253762306a36Sopenharmony_ci			}
253862306a36Sopenharmony_ci			/*
253962306a36Sopenharmony_ci			 * Remember to restart the conflicting requests after
254062306a36Sopenharmony_ci			 * the new peer request has completed.
254162306a36Sopenharmony_ci			 */
254262306a36Sopenharmony_ci			peer_req->flags |= EE_RESTART_REQUESTS;
254362306a36Sopenharmony_ci		}
254462306a36Sopenharmony_ci	}
254562306a36Sopenharmony_ci	err = 0;
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci    out:
254862306a36Sopenharmony_ci	if (err)
254962306a36Sopenharmony_ci		drbd_remove_epoch_entry_interval(device, peer_req);
255062306a36Sopenharmony_ci	return err;
255162306a36Sopenharmony_ci}
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_ci/* mirrored write */
255462306a36Sopenharmony_cistatic int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
255562306a36Sopenharmony_ci{
255662306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
255762306a36Sopenharmony_ci	struct drbd_device *device;
255862306a36Sopenharmony_ci	struct net_conf *nc;
255962306a36Sopenharmony_ci	sector_t sector;
256062306a36Sopenharmony_ci	struct drbd_peer_request *peer_req;
256162306a36Sopenharmony_ci	struct p_data *p = pi->data;
256262306a36Sopenharmony_ci	u32 peer_seq = be32_to_cpu(p->seq_num);
256362306a36Sopenharmony_ci	u32 dp_flags;
256462306a36Sopenharmony_ci	int err, tp;
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
256762306a36Sopenharmony_ci	if (!peer_device)
256862306a36Sopenharmony_ci		return -EIO;
256962306a36Sopenharmony_ci	device = peer_device->device;
257062306a36Sopenharmony_ci
257162306a36Sopenharmony_ci	if (!get_ldev(device)) {
257262306a36Sopenharmony_ci		int err2;
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ci		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
257562306a36Sopenharmony_ci		drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
257662306a36Sopenharmony_ci		atomic_inc(&connection->current_epoch->epoch_size);
257762306a36Sopenharmony_ci		err2 = drbd_drain_block(peer_device, pi->size);
257862306a36Sopenharmony_ci		if (!err)
257962306a36Sopenharmony_ci			err = err2;
258062306a36Sopenharmony_ci		return err;
258162306a36Sopenharmony_ci	}
258262306a36Sopenharmony_ci
258362306a36Sopenharmony_ci	/*
258462306a36Sopenharmony_ci	 * Corresponding put_ldev done either below (on various errors), or in
258562306a36Sopenharmony_ci	 * drbd_peer_request_endio, if we successfully submit the data at the
258662306a36Sopenharmony_ci	 * end of this function.
258762306a36Sopenharmony_ci	 */
258862306a36Sopenharmony_ci
258962306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
259062306a36Sopenharmony_ci	peer_req = read_in_block(peer_device, p->block_id, sector, pi);
259162306a36Sopenharmony_ci	if (!peer_req) {
259262306a36Sopenharmony_ci		put_ldev(device);
259362306a36Sopenharmony_ci		return -EIO;
259462306a36Sopenharmony_ci	}
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci	peer_req->w.cb = e_end_block;
259762306a36Sopenharmony_ci	peer_req->submit_jif = jiffies;
259862306a36Sopenharmony_ci	peer_req->flags |= EE_APPLICATION;
259962306a36Sopenharmony_ci
260062306a36Sopenharmony_ci	dp_flags = be32_to_cpu(p->dp_flags);
260162306a36Sopenharmony_ci	peer_req->opf = wire_flags_to_bio(connection, dp_flags);
260262306a36Sopenharmony_ci	if (pi->cmd == P_TRIM) {
260362306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req->i.size > 0);
260462306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
260562306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req->pages == NULL);
260662306a36Sopenharmony_ci		/* need to play safe: an older DRBD sender
260762306a36Sopenharmony_ci		 * may mean zero-out while sending P_TRIM. */
260862306a36Sopenharmony_ci		if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
260962306a36Sopenharmony_ci			peer_req->flags |= EE_ZEROOUT;
261062306a36Sopenharmony_ci	} else if (pi->cmd == P_ZEROES) {
261162306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req->i.size > 0);
261262306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
261362306a36Sopenharmony_ci		D_ASSERT(peer_device, peer_req->pages == NULL);
261462306a36Sopenharmony_ci		/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
261562306a36Sopenharmony_ci		if (dp_flags & DP_DISCARD)
261662306a36Sopenharmony_ci			peer_req->flags |= EE_TRIM;
261762306a36Sopenharmony_ci	} else if (peer_req->pages == NULL) {
261862306a36Sopenharmony_ci		D_ASSERT(device, peer_req->i.size == 0);
261962306a36Sopenharmony_ci		D_ASSERT(device, dp_flags & DP_FLUSH);
262062306a36Sopenharmony_ci	}
262162306a36Sopenharmony_ci
262262306a36Sopenharmony_ci	if (dp_flags & DP_MAY_SET_IN_SYNC)
262362306a36Sopenharmony_ci		peer_req->flags |= EE_MAY_SET_IN_SYNC;
262462306a36Sopenharmony_ci
262562306a36Sopenharmony_ci	spin_lock(&connection->epoch_lock);
262662306a36Sopenharmony_ci	peer_req->epoch = connection->current_epoch;
262762306a36Sopenharmony_ci	atomic_inc(&peer_req->epoch->epoch_size);
262862306a36Sopenharmony_ci	atomic_inc(&peer_req->epoch->active);
262962306a36Sopenharmony_ci	spin_unlock(&connection->epoch_lock);
263062306a36Sopenharmony_ci
263162306a36Sopenharmony_ci	rcu_read_lock();
263262306a36Sopenharmony_ci	nc = rcu_dereference(peer_device->connection->net_conf);
263362306a36Sopenharmony_ci	tp = nc->two_primaries;
263462306a36Sopenharmony_ci	if (peer_device->connection->agreed_pro_version < 100) {
263562306a36Sopenharmony_ci		switch (nc->wire_protocol) {
263662306a36Sopenharmony_ci		case DRBD_PROT_C:
263762306a36Sopenharmony_ci			dp_flags |= DP_SEND_WRITE_ACK;
263862306a36Sopenharmony_ci			break;
263962306a36Sopenharmony_ci		case DRBD_PROT_B:
264062306a36Sopenharmony_ci			dp_flags |= DP_SEND_RECEIVE_ACK;
264162306a36Sopenharmony_ci			break;
264262306a36Sopenharmony_ci		}
264362306a36Sopenharmony_ci	}
264462306a36Sopenharmony_ci	rcu_read_unlock();
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	if (dp_flags & DP_SEND_WRITE_ACK) {
264762306a36Sopenharmony_ci		peer_req->flags |= EE_SEND_WRITE_ACK;
264862306a36Sopenharmony_ci		inc_unacked(device);
264962306a36Sopenharmony_ci		/* corresponding dec_unacked() in e_end_block()
265062306a36Sopenharmony_ci		 * respective _drbd_clear_done_ee */
265162306a36Sopenharmony_ci	}
265262306a36Sopenharmony_ci
265362306a36Sopenharmony_ci	if (dp_flags & DP_SEND_RECEIVE_ACK) {
265462306a36Sopenharmony_ci		/* I really don't like it that the receiver thread
265562306a36Sopenharmony_ci		 * sends on the msock, but anyways */
265662306a36Sopenharmony_ci		drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
265762306a36Sopenharmony_ci	}
265862306a36Sopenharmony_ci
265962306a36Sopenharmony_ci	if (tp) {
266062306a36Sopenharmony_ci		/* two primaries implies protocol C */
266162306a36Sopenharmony_ci		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
266262306a36Sopenharmony_ci		peer_req->flags |= EE_IN_INTERVAL_TREE;
266362306a36Sopenharmony_ci		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
266462306a36Sopenharmony_ci		if (err)
266562306a36Sopenharmony_ci			goto out_interrupted;
266662306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
266762306a36Sopenharmony_ci		err = handle_write_conflicts(device, peer_req);
266862306a36Sopenharmony_ci		if (err) {
266962306a36Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
267062306a36Sopenharmony_ci			if (err == -ENOENT) {
267162306a36Sopenharmony_ci				put_ldev(device);
267262306a36Sopenharmony_ci				return 0;
267362306a36Sopenharmony_ci			}
267462306a36Sopenharmony_ci			goto out_interrupted;
267562306a36Sopenharmony_ci		}
267662306a36Sopenharmony_ci	} else {
267762306a36Sopenharmony_ci		update_peer_seq(peer_device, peer_seq);
267862306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
267962306a36Sopenharmony_ci	}
268062306a36Sopenharmony_ci	/* TRIM and is processed synchronously,
268162306a36Sopenharmony_ci	 * we wait for all pending requests, respectively wait for
268262306a36Sopenharmony_ci	 * active_ee to become empty in drbd_submit_peer_request();
268362306a36Sopenharmony_ci	 * better not add ourselves here. */
268462306a36Sopenharmony_ci	if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
268562306a36Sopenharmony_ci		list_add_tail(&peer_req->w.list, &device->active_ee);
268662306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci	if (device->state.conn == C_SYNC_TARGET)
268962306a36Sopenharmony_ci		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci	if (device->state.pdsk < D_INCONSISTENT) {
269262306a36Sopenharmony_ci		/* In case we have the only disk of the cluster, */
269362306a36Sopenharmony_ci		drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
269462306a36Sopenharmony_ci		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
269562306a36Sopenharmony_ci		drbd_al_begin_io(device, &peer_req->i);
269662306a36Sopenharmony_ci		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
269762306a36Sopenharmony_ci	}
269862306a36Sopenharmony_ci
269962306a36Sopenharmony_ci	err = drbd_submit_peer_request(peer_req);
270062306a36Sopenharmony_ci	if (!err)
270162306a36Sopenharmony_ci		return 0;
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	/* don't care for the reason here */
270462306a36Sopenharmony_ci	drbd_err(device, "submit failed, triggering re-connect\n");
270562306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
270662306a36Sopenharmony_ci	list_del(&peer_req->w.list);
270762306a36Sopenharmony_ci	drbd_remove_epoch_entry_interval(device, peer_req);
270862306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
270962306a36Sopenharmony_ci	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
271062306a36Sopenharmony_ci		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
271162306a36Sopenharmony_ci		drbd_al_complete_io(device, &peer_req->i);
271262306a36Sopenharmony_ci	}
271362306a36Sopenharmony_ci
271462306a36Sopenharmony_ciout_interrupted:
271562306a36Sopenharmony_ci	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
271662306a36Sopenharmony_ci	put_ldev(device);
271762306a36Sopenharmony_ci	drbd_free_peer_req(device, peer_req);
271862306a36Sopenharmony_ci	return err;
271962306a36Sopenharmony_ci}
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci/* We may throttle resync, if the lower device seems to be busy,
272262306a36Sopenharmony_ci * and current sync rate is above c_min_rate.
272362306a36Sopenharmony_ci *
272462306a36Sopenharmony_ci * To decide whether or not the lower device is busy, we use a scheme similar
272562306a36Sopenharmony_ci * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
272662306a36Sopenharmony_ci * (more than 64 sectors) of activity we cannot account for with our own resync
272762306a36Sopenharmony_ci * activity, it obviously is "busy".
272862306a36Sopenharmony_ci *
272962306a36Sopenharmony_ci * The current sync rate used here uses only the most recent two step marks,
273062306a36Sopenharmony_ci * to have a short time average so we can react faster.
273162306a36Sopenharmony_ci */
273262306a36Sopenharmony_cibool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
273362306a36Sopenharmony_ci		bool throttle_if_app_is_waiting)
273462306a36Sopenharmony_ci{
273562306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
273662306a36Sopenharmony_ci	struct lc_element *tmp;
273762306a36Sopenharmony_ci	bool throttle = drbd_rs_c_min_rate_throttle(device);
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_ci	if (!throttle || throttle_if_app_is_waiting)
274062306a36Sopenharmony_ci		return throttle;
274162306a36Sopenharmony_ci
274262306a36Sopenharmony_ci	spin_lock_irq(&device->al_lock);
274362306a36Sopenharmony_ci	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
274462306a36Sopenharmony_ci	if (tmp) {
274562306a36Sopenharmony_ci		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
274662306a36Sopenharmony_ci		if (test_bit(BME_PRIORITY, &bm_ext->flags))
274762306a36Sopenharmony_ci			throttle = false;
274862306a36Sopenharmony_ci		/* Do not slow down if app IO is already waiting for this extent,
274962306a36Sopenharmony_ci		 * and our progress is necessary for application IO to complete. */
275062306a36Sopenharmony_ci	}
275162306a36Sopenharmony_ci	spin_unlock_irq(&device->al_lock);
275262306a36Sopenharmony_ci
275362306a36Sopenharmony_ci	return throttle;
275462306a36Sopenharmony_ci}
275562306a36Sopenharmony_ci
275662306a36Sopenharmony_cibool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
275762306a36Sopenharmony_ci{
275862306a36Sopenharmony_ci	struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
275962306a36Sopenharmony_ci	unsigned long db, dt, dbdt;
276062306a36Sopenharmony_ci	unsigned int c_min_rate;
276162306a36Sopenharmony_ci	int curr_events;
276262306a36Sopenharmony_ci
276362306a36Sopenharmony_ci	rcu_read_lock();
276462306a36Sopenharmony_ci	c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
276562306a36Sopenharmony_ci	rcu_read_unlock();
276662306a36Sopenharmony_ci
276762306a36Sopenharmony_ci	/* feature disabled? */
276862306a36Sopenharmony_ci	if (c_min_rate == 0)
276962306a36Sopenharmony_ci		return false;
277062306a36Sopenharmony_ci
277162306a36Sopenharmony_ci	curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
277262306a36Sopenharmony_ci			atomic_read(&device->rs_sect_ev);
277362306a36Sopenharmony_ci
277462306a36Sopenharmony_ci	if (atomic_read(&device->ap_actlog_cnt)
277562306a36Sopenharmony_ci	    || curr_events - device->rs_last_events > 64) {
277662306a36Sopenharmony_ci		unsigned long rs_left;
277762306a36Sopenharmony_ci		int i;
277862306a36Sopenharmony_ci
277962306a36Sopenharmony_ci		device->rs_last_events = curr_events;
278062306a36Sopenharmony_ci
278162306a36Sopenharmony_ci		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
278262306a36Sopenharmony_ci		 * approx. */
278362306a36Sopenharmony_ci		i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci		if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
278662306a36Sopenharmony_ci			rs_left = device->ov_left;
278762306a36Sopenharmony_ci		else
278862306a36Sopenharmony_ci			rs_left = drbd_bm_total_weight(device) - device->rs_failed;
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ci		dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
279162306a36Sopenharmony_ci		if (!dt)
279262306a36Sopenharmony_ci			dt++;
279362306a36Sopenharmony_ci		db = device->rs_mark_left[i] - rs_left;
279462306a36Sopenharmony_ci		dbdt = Bit2KB(db/dt);
279562306a36Sopenharmony_ci
279662306a36Sopenharmony_ci		if (dbdt > c_min_rate)
279762306a36Sopenharmony_ci			return true;
279862306a36Sopenharmony_ci	}
279962306a36Sopenharmony_ci	return false;
280062306a36Sopenharmony_ci}
280162306a36Sopenharmony_ci
280262306a36Sopenharmony_cistatic int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
280362306a36Sopenharmony_ci{
280462306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
280562306a36Sopenharmony_ci	struct drbd_device *device;
280662306a36Sopenharmony_ci	sector_t sector;
280762306a36Sopenharmony_ci	sector_t capacity;
280862306a36Sopenharmony_ci	struct drbd_peer_request *peer_req;
280962306a36Sopenharmony_ci	struct digest_info *di = NULL;
281062306a36Sopenharmony_ci	int size, verb;
281162306a36Sopenharmony_ci	struct p_block_req *p =	pi->data;
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
281462306a36Sopenharmony_ci	if (!peer_device)
281562306a36Sopenharmony_ci		return -EIO;
281662306a36Sopenharmony_ci	device = peer_device->device;
281762306a36Sopenharmony_ci	capacity = get_capacity(device->vdisk);
281862306a36Sopenharmony_ci
281962306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
282062306a36Sopenharmony_ci	size   = be32_to_cpu(p->blksize);
282162306a36Sopenharmony_ci
282262306a36Sopenharmony_ci	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
282362306a36Sopenharmony_ci		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
282462306a36Sopenharmony_ci				(unsigned long long)sector, size);
282562306a36Sopenharmony_ci		return -EINVAL;
282662306a36Sopenharmony_ci	}
282762306a36Sopenharmony_ci	if (sector + (size>>9) > capacity) {
282862306a36Sopenharmony_ci		drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
282962306a36Sopenharmony_ci				(unsigned long long)sector, size);
283062306a36Sopenharmony_ci		return -EINVAL;
283162306a36Sopenharmony_ci	}
283262306a36Sopenharmony_ci
283362306a36Sopenharmony_ci	if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
283462306a36Sopenharmony_ci		verb = 1;
283562306a36Sopenharmony_ci		switch (pi->cmd) {
283662306a36Sopenharmony_ci		case P_DATA_REQUEST:
283762306a36Sopenharmony_ci			drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
283862306a36Sopenharmony_ci			break;
283962306a36Sopenharmony_ci		case P_RS_THIN_REQ:
284062306a36Sopenharmony_ci		case P_RS_DATA_REQUEST:
284162306a36Sopenharmony_ci		case P_CSUM_RS_REQUEST:
284262306a36Sopenharmony_ci		case P_OV_REQUEST:
284362306a36Sopenharmony_ci			drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
284462306a36Sopenharmony_ci			break;
284562306a36Sopenharmony_ci		case P_OV_REPLY:
284662306a36Sopenharmony_ci			verb = 0;
284762306a36Sopenharmony_ci			dec_rs_pending(peer_device);
284862306a36Sopenharmony_ci			drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
284962306a36Sopenharmony_ci			break;
285062306a36Sopenharmony_ci		default:
285162306a36Sopenharmony_ci			BUG();
285262306a36Sopenharmony_ci		}
285362306a36Sopenharmony_ci		if (verb && drbd_ratelimit())
285462306a36Sopenharmony_ci			drbd_err(device, "Can not satisfy peer's read request, "
285562306a36Sopenharmony_ci			    "no local data.\n");
285662306a36Sopenharmony_ci
285762306a36Sopenharmony_ci		/* drain possibly payload */
285862306a36Sopenharmony_ci		return drbd_drain_block(peer_device, pi->size);
285962306a36Sopenharmony_ci	}
286062306a36Sopenharmony_ci
286162306a36Sopenharmony_ci	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
286262306a36Sopenharmony_ci	 * "criss-cross" setup, that might cause write-out on some other DRBD,
286362306a36Sopenharmony_ci	 * which in turn might block on the other node at this very place.  */
286462306a36Sopenharmony_ci	peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
286562306a36Sopenharmony_ci			size, GFP_NOIO);
286662306a36Sopenharmony_ci	if (!peer_req) {
286762306a36Sopenharmony_ci		put_ldev(device);
286862306a36Sopenharmony_ci		return -ENOMEM;
286962306a36Sopenharmony_ci	}
287062306a36Sopenharmony_ci	peer_req->opf = REQ_OP_READ;
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_ci	switch (pi->cmd) {
287362306a36Sopenharmony_ci	case P_DATA_REQUEST:
287462306a36Sopenharmony_ci		peer_req->w.cb = w_e_end_data_req;
287562306a36Sopenharmony_ci		/* application IO, don't drbd_rs_begin_io */
287662306a36Sopenharmony_ci		peer_req->flags |= EE_APPLICATION;
287762306a36Sopenharmony_ci		goto submit;
287862306a36Sopenharmony_ci
287962306a36Sopenharmony_ci	case P_RS_THIN_REQ:
288062306a36Sopenharmony_ci		/* If at some point in the future we have a smart way to
288162306a36Sopenharmony_ci		   find out if this data block is completely deallocated,
288262306a36Sopenharmony_ci		   then we would do something smarter here than reading
288362306a36Sopenharmony_ci		   the block... */
288462306a36Sopenharmony_ci		peer_req->flags |= EE_RS_THIN_REQ;
288562306a36Sopenharmony_ci		fallthrough;
288662306a36Sopenharmony_ci	case P_RS_DATA_REQUEST:
288762306a36Sopenharmony_ci		peer_req->w.cb = w_e_end_rsdata_req;
288862306a36Sopenharmony_ci		/* used in the sector offset progress display */
288962306a36Sopenharmony_ci		device->bm_resync_fo = BM_SECT_TO_BIT(sector);
289062306a36Sopenharmony_ci		break;
289162306a36Sopenharmony_ci
289262306a36Sopenharmony_ci	case P_OV_REPLY:
289362306a36Sopenharmony_ci	case P_CSUM_RS_REQUEST:
289462306a36Sopenharmony_ci		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
289562306a36Sopenharmony_ci		if (!di)
289662306a36Sopenharmony_ci			goto out_free_e;
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci		di->digest_size = pi->size;
289962306a36Sopenharmony_ci		di->digest = (((char *)di)+sizeof(struct digest_info));
290062306a36Sopenharmony_ci
290162306a36Sopenharmony_ci		peer_req->digest = di;
290262306a36Sopenharmony_ci		peer_req->flags |= EE_HAS_DIGEST;
290362306a36Sopenharmony_ci
290462306a36Sopenharmony_ci		if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
290562306a36Sopenharmony_ci			goto out_free_e;
290662306a36Sopenharmony_ci
290762306a36Sopenharmony_ci		if (pi->cmd == P_CSUM_RS_REQUEST) {
290862306a36Sopenharmony_ci			D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
290962306a36Sopenharmony_ci			peer_req->w.cb = w_e_end_csum_rs_req;
291062306a36Sopenharmony_ci			/* used in the sector offset progress display */
291162306a36Sopenharmony_ci			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
291262306a36Sopenharmony_ci			/* remember to report stats in drbd_resync_finished */
291362306a36Sopenharmony_ci			device->use_csums = true;
291462306a36Sopenharmony_ci		} else if (pi->cmd == P_OV_REPLY) {
291562306a36Sopenharmony_ci			/* track progress, we may need to throttle */
291662306a36Sopenharmony_ci			atomic_add(size >> 9, &device->rs_sect_in);
291762306a36Sopenharmony_ci			peer_req->w.cb = w_e_end_ov_reply;
291862306a36Sopenharmony_ci			dec_rs_pending(peer_device);
291962306a36Sopenharmony_ci			/* drbd_rs_begin_io done when we sent this request,
292062306a36Sopenharmony_ci			 * but accounting still needs to be done. */
292162306a36Sopenharmony_ci			goto submit_for_resync;
292262306a36Sopenharmony_ci		}
292362306a36Sopenharmony_ci		break;
292462306a36Sopenharmony_ci
292562306a36Sopenharmony_ci	case P_OV_REQUEST:
292662306a36Sopenharmony_ci		if (device->ov_start_sector == ~(sector_t)0 &&
292762306a36Sopenharmony_ci		    peer_device->connection->agreed_pro_version >= 90) {
292862306a36Sopenharmony_ci			unsigned long now = jiffies;
292962306a36Sopenharmony_ci			int i;
293062306a36Sopenharmony_ci			device->ov_start_sector = sector;
293162306a36Sopenharmony_ci			device->ov_position = sector;
293262306a36Sopenharmony_ci			device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
293362306a36Sopenharmony_ci			device->rs_total = device->ov_left;
293462306a36Sopenharmony_ci			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
293562306a36Sopenharmony_ci				device->rs_mark_left[i] = device->ov_left;
293662306a36Sopenharmony_ci				device->rs_mark_time[i] = now;
293762306a36Sopenharmony_ci			}
293862306a36Sopenharmony_ci			drbd_info(device, "Online Verify start sector: %llu\n",
293962306a36Sopenharmony_ci					(unsigned long long)sector);
294062306a36Sopenharmony_ci		}
294162306a36Sopenharmony_ci		peer_req->w.cb = w_e_end_ov_req;
294262306a36Sopenharmony_ci		break;
294362306a36Sopenharmony_ci
294462306a36Sopenharmony_ci	default:
294562306a36Sopenharmony_ci		BUG();
294662306a36Sopenharmony_ci	}
294762306a36Sopenharmony_ci
294862306a36Sopenharmony_ci	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
294962306a36Sopenharmony_ci	 * wrt the receiver, but it is not as straightforward as it may seem.
295062306a36Sopenharmony_ci	 * Various places in the resync start and stop logic assume resync
295162306a36Sopenharmony_ci	 * requests are processed in order, requeuing this on the worker thread
295262306a36Sopenharmony_ci	 * introduces a bunch of new code for synchronization between threads.
295362306a36Sopenharmony_ci	 *
295462306a36Sopenharmony_ci	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
295562306a36Sopenharmony_ci	 * "forever", throttling after drbd_rs_begin_io will lock that extent
295662306a36Sopenharmony_ci	 * for application writes for the same time.  For now, just throttle
295762306a36Sopenharmony_ci	 * here, where the rest of the code expects the receiver to sleep for
295862306a36Sopenharmony_ci	 * a while, anyways.
295962306a36Sopenharmony_ci	 */
296062306a36Sopenharmony_ci
296162306a36Sopenharmony_ci	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
296262306a36Sopenharmony_ci	 * this defers syncer requests for some time, before letting at least
296362306a36Sopenharmony_ci	 * on request through.  The resync controller on the receiving side
296462306a36Sopenharmony_ci	 * will adapt to the incoming rate accordingly.
296562306a36Sopenharmony_ci	 *
296662306a36Sopenharmony_ci	 * We cannot throttle here if remote is Primary/SyncTarget:
296762306a36Sopenharmony_ci	 * we would also throttle its application reads.
296862306a36Sopenharmony_ci	 * In that case, throttling is done on the SyncTarget only.
296962306a36Sopenharmony_ci	 */
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci	/* Even though this may be a resync request, we do add to "read_ee";
297262306a36Sopenharmony_ci	 * "sync_ee" is only used for resync WRITEs.
297362306a36Sopenharmony_ci	 * Add to list early, so debugfs can find this request
297462306a36Sopenharmony_ci	 * even if we have to sleep below. */
297562306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
297662306a36Sopenharmony_ci	list_add_tail(&peer_req->w.list, &device->read_ee);
297762306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
297862306a36Sopenharmony_ci
297962306a36Sopenharmony_ci	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
298062306a36Sopenharmony_ci	if (device->state.peer != R_PRIMARY
298162306a36Sopenharmony_ci	&& drbd_rs_should_slow_down(peer_device, sector, false))
298262306a36Sopenharmony_ci		schedule_timeout_uninterruptible(HZ/10);
298362306a36Sopenharmony_ci	update_receiver_timing_details(connection, drbd_rs_begin_io);
298462306a36Sopenharmony_ci	if (drbd_rs_begin_io(device, sector))
298562306a36Sopenharmony_ci		goto out_free_e;
298662306a36Sopenharmony_ci
298762306a36Sopenharmony_cisubmit_for_resync:
298862306a36Sopenharmony_ci	atomic_add(size >> 9, &device->rs_sect_ev);
298962306a36Sopenharmony_ci
299062306a36Sopenharmony_cisubmit:
299162306a36Sopenharmony_ci	update_receiver_timing_details(connection, drbd_submit_peer_request);
299262306a36Sopenharmony_ci	inc_unacked(device);
299362306a36Sopenharmony_ci	if (drbd_submit_peer_request(peer_req) == 0)
299462306a36Sopenharmony_ci		return 0;
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_ci	/* don't care for the reason here */
299762306a36Sopenharmony_ci	drbd_err(device, "submit failed, triggering re-connect\n");
299862306a36Sopenharmony_ci
299962306a36Sopenharmony_ciout_free_e:
300062306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
300162306a36Sopenharmony_ci	list_del(&peer_req->w.list);
300262306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
300362306a36Sopenharmony_ci	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
300462306a36Sopenharmony_ci
300562306a36Sopenharmony_ci	put_ldev(device);
300662306a36Sopenharmony_ci	drbd_free_peer_req(device, peer_req);
300762306a36Sopenharmony_ci	return -EIO;
300862306a36Sopenharmony_ci}
300962306a36Sopenharmony_ci
301062306a36Sopenharmony_ci/*
301162306a36Sopenharmony_ci * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
301262306a36Sopenharmony_ci */
301362306a36Sopenharmony_cistatic int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
301462306a36Sopenharmony_ci{
301562306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
301662306a36Sopenharmony_ci	int self, peer, rv = -100;
301762306a36Sopenharmony_ci	unsigned long ch_self, ch_peer;
301862306a36Sopenharmony_ci	enum drbd_after_sb_p after_sb_0p;
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_BITMAP] & 1;
302162306a36Sopenharmony_ci	peer = device->p_uuid[UI_BITMAP] & 1;
302262306a36Sopenharmony_ci
302362306a36Sopenharmony_ci	ch_peer = device->p_uuid[UI_SIZE];
302462306a36Sopenharmony_ci	ch_self = device->comm_bm_set;
302562306a36Sopenharmony_ci
302662306a36Sopenharmony_ci	rcu_read_lock();
302762306a36Sopenharmony_ci	after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
302862306a36Sopenharmony_ci	rcu_read_unlock();
302962306a36Sopenharmony_ci	switch (after_sb_0p) {
303062306a36Sopenharmony_ci	case ASB_CONSENSUS:
303162306a36Sopenharmony_ci	case ASB_DISCARD_SECONDARY:
303262306a36Sopenharmony_ci	case ASB_CALL_HELPER:
303362306a36Sopenharmony_ci	case ASB_VIOLENTLY:
303462306a36Sopenharmony_ci		drbd_err(device, "Configuration error.\n");
303562306a36Sopenharmony_ci		break;
303662306a36Sopenharmony_ci	case ASB_DISCONNECT:
303762306a36Sopenharmony_ci		break;
303862306a36Sopenharmony_ci	case ASB_DISCARD_YOUNGER_PRI:
303962306a36Sopenharmony_ci		if (self == 0 && peer == 1) {
304062306a36Sopenharmony_ci			rv = -1;
304162306a36Sopenharmony_ci			break;
304262306a36Sopenharmony_ci		}
304362306a36Sopenharmony_ci		if (self == 1 && peer == 0) {
304462306a36Sopenharmony_ci			rv =  1;
304562306a36Sopenharmony_ci			break;
304662306a36Sopenharmony_ci		}
304762306a36Sopenharmony_ci		fallthrough;	/* to one of the other strategies */
304862306a36Sopenharmony_ci	case ASB_DISCARD_OLDER_PRI:
304962306a36Sopenharmony_ci		if (self == 0 && peer == 1) {
305062306a36Sopenharmony_ci			rv = 1;
305162306a36Sopenharmony_ci			break;
305262306a36Sopenharmony_ci		}
305362306a36Sopenharmony_ci		if (self == 1 && peer == 0) {
305462306a36Sopenharmony_ci			rv = -1;
305562306a36Sopenharmony_ci			break;
305662306a36Sopenharmony_ci		}
305762306a36Sopenharmony_ci		/* Else fall through to one of the other strategies... */
305862306a36Sopenharmony_ci		drbd_warn(device, "Discard younger/older primary did not find a decision\n"
305962306a36Sopenharmony_ci		     "Using discard-least-changes instead\n");
306062306a36Sopenharmony_ci		fallthrough;
306162306a36Sopenharmony_ci	case ASB_DISCARD_ZERO_CHG:
306262306a36Sopenharmony_ci		if (ch_peer == 0 && ch_self == 0) {
306362306a36Sopenharmony_ci			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
306462306a36Sopenharmony_ci				? -1 : 1;
306562306a36Sopenharmony_ci			break;
306662306a36Sopenharmony_ci		} else {
306762306a36Sopenharmony_ci			if (ch_peer == 0) { rv =  1; break; }
306862306a36Sopenharmony_ci			if (ch_self == 0) { rv = -1; break; }
306962306a36Sopenharmony_ci		}
307062306a36Sopenharmony_ci		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
307162306a36Sopenharmony_ci			break;
307262306a36Sopenharmony_ci		fallthrough;
307362306a36Sopenharmony_ci	case ASB_DISCARD_LEAST_CHG:
307462306a36Sopenharmony_ci		if	(ch_self < ch_peer)
307562306a36Sopenharmony_ci			rv = -1;
307662306a36Sopenharmony_ci		else if (ch_self > ch_peer)
307762306a36Sopenharmony_ci			rv =  1;
307862306a36Sopenharmony_ci		else /* ( ch_self == ch_peer ) */
307962306a36Sopenharmony_ci		     /* Well, then use something else. */
308062306a36Sopenharmony_ci			rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
308162306a36Sopenharmony_ci				? -1 : 1;
308262306a36Sopenharmony_ci		break;
308362306a36Sopenharmony_ci	case ASB_DISCARD_LOCAL:
308462306a36Sopenharmony_ci		rv = -1;
308562306a36Sopenharmony_ci		break;
308662306a36Sopenharmony_ci	case ASB_DISCARD_REMOTE:
308762306a36Sopenharmony_ci		rv =  1;
308862306a36Sopenharmony_ci	}
308962306a36Sopenharmony_ci
309062306a36Sopenharmony_ci	return rv;
309162306a36Sopenharmony_ci}
309262306a36Sopenharmony_ci
309362306a36Sopenharmony_ci/*
309462306a36Sopenharmony_ci * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
309562306a36Sopenharmony_ci */
309662306a36Sopenharmony_cistatic int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
309762306a36Sopenharmony_ci{
309862306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
309962306a36Sopenharmony_ci	int hg, rv = -100;
310062306a36Sopenharmony_ci	enum drbd_after_sb_p after_sb_1p;
310162306a36Sopenharmony_ci
310262306a36Sopenharmony_ci	rcu_read_lock();
310362306a36Sopenharmony_ci	after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
310462306a36Sopenharmony_ci	rcu_read_unlock();
310562306a36Sopenharmony_ci	switch (after_sb_1p) {
310662306a36Sopenharmony_ci	case ASB_DISCARD_YOUNGER_PRI:
310762306a36Sopenharmony_ci	case ASB_DISCARD_OLDER_PRI:
310862306a36Sopenharmony_ci	case ASB_DISCARD_LEAST_CHG:
310962306a36Sopenharmony_ci	case ASB_DISCARD_LOCAL:
311062306a36Sopenharmony_ci	case ASB_DISCARD_REMOTE:
311162306a36Sopenharmony_ci	case ASB_DISCARD_ZERO_CHG:
311262306a36Sopenharmony_ci		drbd_err(device, "Configuration error.\n");
311362306a36Sopenharmony_ci		break;
311462306a36Sopenharmony_ci	case ASB_DISCONNECT:
311562306a36Sopenharmony_ci		break;
311662306a36Sopenharmony_ci	case ASB_CONSENSUS:
311762306a36Sopenharmony_ci		hg = drbd_asb_recover_0p(peer_device);
311862306a36Sopenharmony_ci		if (hg == -1 && device->state.role == R_SECONDARY)
311962306a36Sopenharmony_ci			rv = hg;
312062306a36Sopenharmony_ci		if (hg == 1  && device->state.role == R_PRIMARY)
312162306a36Sopenharmony_ci			rv = hg;
312262306a36Sopenharmony_ci		break;
312362306a36Sopenharmony_ci	case ASB_VIOLENTLY:
312462306a36Sopenharmony_ci		rv = drbd_asb_recover_0p(peer_device);
312562306a36Sopenharmony_ci		break;
312662306a36Sopenharmony_ci	case ASB_DISCARD_SECONDARY:
312762306a36Sopenharmony_ci		return device->state.role == R_PRIMARY ? 1 : -1;
312862306a36Sopenharmony_ci	case ASB_CALL_HELPER:
312962306a36Sopenharmony_ci		hg = drbd_asb_recover_0p(peer_device);
313062306a36Sopenharmony_ci		if (hg == -1 && device->state.role == R_PRIMARY) {
313162306a36Sopenharmony_ci			enum drbd_state_rv rv2;
313262306a36Sopenharmony_ci
313362306a36Sopenharmony_ci			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
313462306a36Sopenharmony_ci			  * we might be here in C_WF_REPORT_PARAMS which is transient.
313562306a36Sopenharmony_ci			  * we do not need to wait for the after state change work either. */
313662306a36Sopenharmony_ci			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
313762306a36Sopenharmony_ci			if (rv2 != SS_SUCCESS) {
313862306a36Sopenharmony_ci				drbd_khelper(device, "pri-lost-after-sb");
313962306a36Sopenharmony_ci			} else {
314062306a36Sopenharmony_ci				drbd_warn(device, "Successfully gave up primary role.\n");
314162306a36Sopenharmony_ci				rv = hg;
314262306a36Sopenharmony_ci			}
314362306a36Sopenharmony_ci		} else
314462306a36Sopenharmony_ci			rv = hg;
314562306a36Sopenharmony_ci	}
314662306a36Sopenharmony_ci
314762306a36Sopenharmony_ci	return rv;
314862306a36Sopenharmony_ci}
314962306a36Sopenharmony_ci
315062306a36Sopenharmony_ci/*
315162306a36Sopenharmony_ci * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
315262306a36Sopenharmony_ci */
315362306a36Sopenharmony_cistatic int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
315462306a36Sopenharmony_ci{
315562306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
315662306a36Sopenharmony_ci	int hg, rv = -100;
315762306a36Sopenharmony_ci	enum drbd_after_sb_p after_sb_2p;
315862306a36Sopenharmony_ci
315962306a36Sopenharmony_ci	rcu_read_lock();
316062306a36Sopenharmony_ci	after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
316162306a36Sopenharmony_ci	rcu_read_unlock();
316262306a36Sopenharmony_ci	switch (after_sb_2p) {
316362306a36Sopenharmony_ci	case ASB_DISCARD_YOUNGER_PRI:
316462306a36Sopenharmony_ci	case ASB_DISCARD_OLDER_PRI:
316562306a36Sopenharmony_ci	case ASB_DISCARD_LEAST_CHG:
316662306a36Sopenharmony_ci	case ASB_DISCARD_LOCAL:
316762306a36Sopenharmony_ci	case ASB_DISCARD_REMOTE:
316862306a36Sopenharmony_ci	case ASB_CONSENSUS:
316962306a36Sopenharmony_ci	case ASB_DISCARD_SECONDARY:
317062306a36Sopenharmony_ci	case ASB_DISCARD_ZERO_CHG:
317162306a36Sopenharmony_ci		drbd_err(device, "Configuration error.\n");
317262306a36Sopenharmony_ci		break;
317362306a36Sopenharmony_ci	case ASB_VIOLENTLY:
317462306a36Sopenharmony_ci		rv = drbd_asb_recover_0p(peer_device);
317562306a36Sopenharmony_ci		break;
317662306a36Sopenharmony_ci	case ASB_DISCONNECT:
317762306a36Sopenharmony_ci		break;
317862306a36Sopenharmony_ci	case ASB_CALL_HELPER:
317962306a36Sopenharmony_ci		hg = drbd_asb_recover_0p(peer_device);
318062306a36Sopenharmony_ci		if (hg == -1) {
318162306a36Sopenharmony_ci			enum drbd_state_rv rv2;
318262306a36Sopenharmony_ci
318362306a36Sopenharmony_ci			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
318462306a36Sopenharmony_ci			  * we might be here in C_WF_REPORT_PARAMS which is transient.
318562306a36Sopenharmony_ci			  * we do not need to wait for the after state change work either. */
318662306a36Sopenharmony_ci			rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
318762306a36Sopenharmony_ci			if (rv2 != SS_SUCCESS) {
318862306a36Sopenharmony_ci				drbd_khelper(device, "pri-lost-after-sb");
318962306a36Sopenharmony_ci			} else {
319062306a36Sopenharmony_ci				drbd_warn(device, "Successfully gave up primary role.\n");
319162306a36Sopenharmony_ci				rv = hg;
319262306a36Sopenharmony_ci			}
319362306a36Sopenharmony_ci		} else
319462306a36Sopenharmony_ci			rv = hg;
319562306a36Sopenharmony_ci	}
319662306a36Sopenharmony_ci
319762306a36Sopenharmony_ci	return rv;
319862306a36Sopenharmony_ci}
319962306a36Sopenharmony_ci
320062306a36Sopenharmony_cistatic void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
320162306a36Sopenharmony_ci			   u64 bits, u64 flags)
320262306a36Sopenharmony_ci{
320362306a36Sopenharmony_ci	if (!uuid) {
320462306a36Sopenharmony_ci		drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
320562306a36Sopenharmony_ci		return;
320662306a36Sopenharmony_ci	}
320762306a36Sopenharmony_ci	drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
320862306a36Sopenharmony_ci	     text,
320962306a36Sopenharmony_ci	     (unsigned long long)uuid[UI_CURRENT],
321062306a36Sopenharmony_ci	     (unsigned long long)uuid[UI_BITMAP],
321162306a36Sopenharmony_ci	     (unsigned long long)uuid[UI_HISTORY_START],
321262306a36Sopenharmony_ci	     (unsigned long long)uuid[UI_HISTORY_END],
321362306a36Sopenharmony_ci	     (unsigned long long)bits,
321462306a36Sopenharmony_ci	     (unsigned long long)flags);
321562306a36Sopenharmony_ci}
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci/*
321862306a36Sopenharmony_ci  100	after split brain try auto recover
321962306a36Sopenharmony_ci    2	C_SYNC_SOURCE set BitMap
322062306a36Sopenharmony_ci    1	C_SYNC_SOURCE use BitMap
322162306a36Sopenharmony_ci    0	no Sync
322262306a36Sopenharmony_ci   -1	C_SYNC_TARGET use BitMap
322362306a36Sopenharmony_ci   -2	C_SYNC_TARGET set BitMap
322462306a36Sopenharmony_ci -100	after split brain, disconnect
322562306a36Sopenharmony_ci-1000	unrelated data
322662306a36Sopenharmony_ci-1091   requires proto 91
322762306a36Sopenharmony_ci-1096   requires proto 96
322862306a36Sopenharmony_ci */
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_cistatic int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
323162306a36Sopenharmony_ci		enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
323262306a36Sopenharmony_ci{
323362306a36Sopenharmony_ci	struct drbd_connection *const connection = peer_device->connection;
323462306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
323562306a36Sopenharmony_ci	u64 self, peer;
323662306a36Sopenharmony_ci	int i, j;
323762306a36Sopenharmony_ci
323862306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
323962306a36Sopenharmony_ci	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
324062306a36Sopenharmony_ci
324162306a36Sopenharmony_ci	*rule_nr = 10;
324262306a36Sopenharmony_ci	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
324362306a36Sopenharmony_ci		return 0;
324462306a36Sopenharmony_ci
324562306a36Sopenharmony_ci	*rule_nr = 20;
324662306a36Sopenharmony_ci	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
324762306a36Sopenharmony_ci	     peer != UUID_JUST_CREATED)
324862306a36Sopenharmony_ci		return -2;
324962306a36Sopenharmony_ci
325062306a36Sopenharmony_ci	*rule_nr = 30;
325162306a36Sopenharmony_ci	if (self != UUID_JUST_CREATED &&
325262306a36Sopenharmony_ci	    (peer == UUID_JUST_CREATED || peer == (u64)0))
325362306a36Sopenharmony_ci		return 2;
325462306a36Sopenharmony_ci
325562306a36Sopenharmony_ci	if (self == peer) {
325662306a36Sopenharmony_ci		int rct, dc; /* roles at crash time */
325762306a36Sopenharmony_ci
325862306a36Sopenharmony_ci		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
325962306a36Sopenharmony_ci
326062306a36Sopenharmony_ci			if (connection->agreed_pro_version < 91)
326162306a36Sopenharmony_ci				return -1091;
326262306a36Sopenharmony_ci
326362306a36Sopenharmony_ci			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
326462306a36Sopenharmony_ci			    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
326562306a36Sopenharmony_ci				drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
326662306a36Sopenharmony_ci				drbd_uuid_move_history(device);
326762306a36Sopenharmony_ci				device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
326862306a36Sopenharmony_ci				device->ldev->md.uuid[UI_BITMAP] = 0;
326962306a36Sopenharmony_ci
327062306a36Sopenharmony_ci				drbd_uuid_dump(device, "self", device->ldev->md.uuid,
327162306a36Sopenharmony_ci					       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
327262306a36Sopenharmony_ci				*rule_nr = 34;
327362306a36Sopenharmony_ci			} else {
327462306a36Sopenharmony_ci				drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
327562306a36Sopenharmony_ci				*rule_nr = 36;
327662306a36Sopenharmony_ci			}
327762306a36Sopenharmony_ci
327862306a36Sopenharmony_ci			return 1;
327962306a36Sopenharmony_ci		}
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_ci		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
328262306a36Sopenharmony_ci
328362306a36Sopenharmony_ci			if (connection->agreed_pro_version < 91)
328462306a36Sopenharmony_ci				return -1091;
328562306a36Sopenharmony_ci
328662306a36Sopenharmony_ci			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
328762306a36Sopenharmony_ci			    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
328862306a36Sopenharmony_ci				drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
328962306a36Sopenharmony_ci
329062306a36Sopenharmony_ci				device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
329162306a36Sopenharmony_ci				device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
329262306a36Sopenharmony_ci				device->p_uuid[UI_BITMAP] = 0UL;
329362306a36Sopenharmony_ci
329462306a36Sopenharmony_ci				drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
329562306a36Sopenharmony_ci				*rule_nr = 35;
329662306a36Sopenharmony_ci			} else {
329762306a36Sopenharmony_ci				drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
329862306a36Sopenharmony_ci				*rule_nr = 37;
329962306a36Sopenharmony_ci			}
330062306a36Sopenharmony_ci
330162306a36Sopenharmony_ci			return -1;
330262306a36Sopenharmony_ci		}
330362306a36Sopenharmony_ci
330462306a36Sopenharmony_ci		/* Common power [off|failure] */
330562306a36Sopenharmony_ci		rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
330662306a36Sopenharmony_ci			(device->p_uuid[UI_FLAGS] & 2);
330762306a36Sopenharmony_ci		/* lowest bit is set when we were primary,
330862306a36Sopenharmony_ci		 * next bit (weight 2) is set when peer was primary */
330962306a36Sopenharmony_ci		*rule_nr = 40;
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci		/* Neither has the "crashed primary" flag set,
331262306a36Sopenharmony_ci		 * only a replication link hickup. */
331362306a36Sopenharmony_ci		if (rct == 0)
331462306a36Sopenharmony_ci			return 0;
331562306a36Sopenharmony_ci
331662306a36Sopenharmony_ci		/* Current UUID equal and no bitmap uuid; does not necessarily
331762306a36Sopenharmony_ci		 * mean this was a "simultaneous hard crash", maybe IO was
331862306a36Sopenharmony_ci		 * frozen, so no UUID-bump happened.
331962306a36Sopenharmony_ci		 * This is a protocol change, overload DRBD_FF_WSAME as flag
332062306a36Sopenharmony_ci		 * for "new-enough" peer DRBD version. */
332162306a36Sopenharmony_ci		if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
332262306a36Sopenharmony_ci			*rule_nr = 41;
332362306a36Sopenharmony_ci			if (!(connection->agreed_features & DRBD_FF_WSAME)) {
332462306a36Sopenharmony_ci				drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
332562306a36Sopenharmony_ci				return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
332662306a36Sopenharmony_ci			}
332762306a36Sopenharmony_ci			if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
332862306a36Sopenharmony_ci				/* At least one has the "crashed primary" bit set,
332962306a36Sopenharmony_ci				 * both are primary now, but neither has rotated its UUIDs?
333062306a36Sopenharmony_ci				 * "Can not happen." */
333162306a36Sopenharmony_ci				drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
333262306a36Sopenharmony_ci				return -100;
333362306a36Sopenharmony_ci			}
333462306a36Sopenharmony_ci			if (device->state.role == R_PRIMARY)
333562306a36Sopenharmony_ci				return 1;
333662306a36Sopenharmony_ci			return -1;
333762306a36Sopenharmony_ci		}
333862306a36Sopenharmony_ci
333962306a36Sopenharmony_ci		/* Both are secondary.
334062306a36Sopenharmony_ci		 * Really looks like recovery from simultaneous hard crash.
334162306a36Sopenharmony_ci		 * Check which had been primary before, and arbitrate. */
334262306a36Sopenharmony_ci		switch (rct) {
334362306a36Sopenharmony_ci		case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
334462306a36Sopenharmony_ci		case 1: /*  self_pri && !peer_pri */ return 1;
334562306a36Sopenharmony_ci		case 2: /* !self_pri &&  peer_pri */ return -1;
334662306a36Sopenharmony_ci		case 3: /*  self_pri &&  peer_pri */
334762306a36Sopenharmony_ci			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
334862306a36Sopenharmony_ci			return dc ? -1 : 1;
334962306a36Sopenharmony_ci		}
335062306a36Sopenharmony_ci	}
335162306a36Sopenharmony_ci
335262306a36Sopenharmony_ci	*rule_nr = 50;
335362306a36Sopenharmony_ci	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
335462306a36Sopenharmony_ci	if (self == peer)
335562306a36Sopenharmony_ci		return -1;
335662306a36Sopenharmony_ci
335762306a36Sopenharmony_ci	*rule_nr = 51;
335862306a36Sopenharmony_ci	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
335962306a36Sopenharmony_ci	if (self == peer) {
336062306a36Sopenharmony_ci		if (connection->agreed_pro_version < 96 ?
336162306a36Sopenharmony_ci		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
336262306a36Sopenharmony_ci		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
336362306a36Sopenharmony_ci		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
336462306a36Sopenharmony_ci			/* The last P_SYNC_UUID did not get though. Undo the last start of
336562306a36Sopenharmony_ci			   resync as sync source modifications of the peer's UUIDs. */
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci			if (connection->agreed_pro_version < 91)
336862306a36Sopenharmony_ci				return -1091;
336962306a36Sopenharmony_ci
337062306a36Sopenharmony_ci			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
337162306a36Sopenharmony_ci			device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
337262306a36Sopenharmony_ci
337362306a36Sopenharmony_ci			drbd_info(device, "Lost last syncUUID packet, corrected:\n");
337462306a36Sopenharmony_ci			drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_ci			return -1;
337762306a36Sopenharmony_ci		}
337862306a36Sopenharmony_ci	}
337962306a36Sopenharmony_ci
338062306a36Sopenharmony_ci	*rule_nr = 60;
338162306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
338262306a36Sopenharmony_ci	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
338362306a36Sopenharmony_ci		peer = device->p_uuid[i] & ~((u64)1);
338462306a36Sopenharmony_ci		if (self == peer)
338562306a36Sopenharmony_ci			return -2;
338662306a36Sopenharmony_ci	}
338762306a36Sopenharmony_ci
338862306a36Sopenharmony_ci	*rule_nr = 70;
338962306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
339062306a36Sopenharmony_ci	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
339162306a36Sopenharmony_ci	if (self == peer)
339262306a36Sopenharmony_ci		return 1;
339362306a36Sopenharmony_ci
339462306a36Sopenharmony_ci	*rule_nr = 71;
339562306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
339662306a36Sopenharmony_ci	if (self == peer) {
339762306a36Sopenharmony_ci		if (connection->agreed_pro_version < 96 ?
339862306a36Sopenharmony_ci		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
339962306a36Sopenharmony_ci		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
340062306a36Sopenharmony_ci		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
340162306a36Sopenharmony_ci			/* The last P_SYNC_UUID did not get though. Undo the last start of
340262306a36Sopenharmony_ci			   resync as sync source modifications of our UUIDs. */
340362306a36Sopenharmony_ci
340462306a36Sopenharmony_ci			if (connection->agreed_pro_version < 91)
340562306a36Sopenharmony_ci				return -1091;
340662306a36Sopenharmony_ci
340762306a36Sopenharmony_ci			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
340862306a36Sopenharmony_ci			__drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci			drbd_info(device, "Last syncUUID did not get through, corrected:\n");
341162306a36Sopenharmony_ci			drbd_uuid_dump(device, "self", device->ldev->md.uuid,
341262306a36Sopenharmony_ci				       device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
341362306a36Sopenharmony_ci
341462306a36Sopenharmony_ci			return 1;
341562306a36Sopenharmony_ci		}
341662306a36Sopenharmony_ci	}
341762306a36Sopenharmony_ci
341862306a36Sopenharmony_ci
341962306a36Sopenharmony_ci	*rule_nr = 80;
342062306a36Sopenharmony_ci	peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
342162306a36Sopenharmony_ci	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
342262306a36Sopenharmony_ci		self = device->ldev->md.uuid[i] & ~((u64)1);
342362306a36Sopenharmony_ci		if (self == peer)
342462306a36Sopenharmony_ci			return 2;
342562306a36Sopenharmony_ci	}
342662306a36Sopenharmony_ci
342762306a36Sopenharmony_ci	*rule_nr = 90;
342862306a36Sopenharmony_ci	self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
342962306a36Sopenharmony_ci	peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
343062306a36Sopenharmony_ci	if (self == peer && self != ((u64)0))
343162306a36Sopenharmony_ci		return 100;
343262306a36Sopenharmony_ci
343362306a36Sopenharmony_ci	*rule_nr = 100;
343462306a36Sopenharmony_ci	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
343562306a36Sopenharmony_ci		self = device->ldev->md.uuid[i] & ~((u64)1);
343662306a36Sopenharmony_ci		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
343762306a36Sopenharmony_ci			peer = device->p_uuid[j] & ~((u64)1);
343862306a36Sopenharmony_ci			if (self == peer)
343962306a36Sopenharmony_ci				return -100;
344062306a36Sopenharmony_ci		}
344162306a36Sopenharmony_ci	}
344262306a36Sopenharmony_ci
344362306a36Sopenharmony_ci	return -1000;
344462306a36Sopenharmony_ci}
344562306a36Sopenharmony_ci
344662306a36Sopenharmony_ci/* drbd_sync_handshake() returns the new conn state on success, or
344762306a36Sopenharmony_ci   CONN_MASK (-1) on failure.
344862306a36Sopenharmony_ci */
344962306a36Sopenharmony_cistatic enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
345062306a36Sopenharmony_ci					   enum drbd_role peer_role,
345162306a36Sopenharmony_ci					   enum drbd_disk_state peer_disk) __must_hold(local)
345262306a36Sopenharmony_ci{
345362306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
345462306a36Sopenharmony_ci	enum drbd_conns rv = C_MASK;
345562306a36Sopenharmony_ci	enum drbd_disk_state mydisk;
345662306a36Sopenharmony_ci	struct net_conf *nc;
345762306a36Sopenharmony_ci	int hg, rule_nr, rr_conflict, tentative, always_asbp;
345862306a36Sopenharmony_ci
345962306a36Sopenharmony_ci	mydisk = device->state.disk;
346062306a36Sopenharmony_ci	if (mydisk == D_NEGOTIATING)
346162306a36Sopenharmony_ci		mydisk = device->new_state_tmp.disk;
346262306a36Sopenharmony_ci
346362306a36Sopenharmony_ci	drbd_info(device, "drbd_sync_handshake:\n");
346462306a36Sopenharmony_ci
346562306a36Sopenharmony_ci	spin_lock_irq(&device->ldev->md.uuid_lock);
346662306a36Sopenharmony_ci	drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
346762306a36Sopenharmony_ci	drbd_uuid_dump(device, "peer", device->p_uuid,
346862306a36Sopenharmony_ci		       device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
346962306a36Sopenharmony_ci
347062306a36Sopenharmony_ci	hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
347162306a36Sopenharmony_ci	spin_unlock_irq(&device->ldev->md.uuid_lock);
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ci	drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci	if (hg == -1000) {
347662306a36Sopenharmony_ci		drbd_alert(device, "Unrelated data, aborting!\n");
347762306a36Sopenharmony_ci		return C_MASK;
347862306a36Sopenharmony_ci	}
347962306a36Sopenharmony_ci	if (hg < -0x10000) {
348062306a36Sopenharmony_ci		int proto, fflags;
348162306a36Sopenharmony_ci		hg = -hg;
348262306a36Sopenharmony_ci		proto = hg & 0xff;
348362306a36Sopenharmony_ci		fflags = (hg >> 8) & 0xff;
348462306a36Sopenharmony_ci		drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
348562306a36Sopenharmony_ci					proto, fflags);
348662306a36Sopenharmony_ci		return C_MASK;
348762306a36Sopenharmony_ci	}
348862306a36Sopenharmony_ci	if (hg < -1000) {
348962306a36Sopenharmony_ci		drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
349062306a36Sopenharmony_ci		return C_MASK;
349162306a36Sopenharmony_ci	}
349262306a36Sopenharmony_ci
349362306a36Sopenharmony_ci	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
349462306a36Sopenharmony_ci	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
349562306a36Sopenharmony_ci		int f = (hg == -100) || abs(hg) == 2;
349662306a36Sopenharmony_ci		hg = mydisk > D_INCONSISTENT ? 1 : -1;
349762306a36Sopenharmony_ci		if (f)
349862306a36Sopenharmony_ci			hg = hg*2;
349962306a36Sopenharmony_ci		drbd_info(device, "Becoming sync %s due to disk states.\n",
350062306a36Sopenharmony_ci		     hg > 0 ? "source" : "target");
350162306a36Sopenharmony_ci	}
350262306a36Sopenharmony_ci
350362306a36Sopenharmony_ci	if (abs(hg) == 100)
350462306a36Sopenharmony_ci		drbd_khelper(device, "initial-split-brain");
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ci	rcu_read_lock();
350762306a36Sopenharmony_ci	nc = rcu_dereference(peer_device->connection->net_conf);
350862306a36Sopenharmony_ci	always_asbp = nc->always_asbp;
350962306a36Sopenharmony_ci	rr_conflict = nc->rr_conflict;
351062306a36Sopenharmony_ci	tentative = nc->tentative;
351162306a36Sopenharmony_ci	rcu_read_unlock();
351262306a36Sopenharmony_ci
351362306a36Sopenharmony_ci	if (hg == 100 || (hg == -100 && always_asbp)) {
351462306a36Sopenharmony_ci		int pcount = (device->state.role == R_PRIMARY)
351562306a36Sopenharmony_ci			   + (peer_role == R_PRIMARY);
351662306a36Sopenharmony_ci		int forced = (hg == -100);
351762306a36Sopenharmony_ci
351862306a36Sopenharmony_ci		switch (pcount) {
351962306a36Sopenharmony_ci		case 0:
352062306a36Sopenharmony_ci			hg = drbd_asb_recover_0p(peer_device);
352162306a36Sopenharmony_ci			break;
352262306a36Sopenharmony_ci		case 1:
352362306a36Sopenharmony_ci			hg = drbd_asb_recover_1p(peer_device);
352462306a36Sopenharmony_ci			break;
352562306a36Sopenharmony_ci		case 2:
352662306a36Sopenharmony_ci			hg = drbd_asb_recover_2p(peer_device);
352762306a36Sopenharmony_ci			break;
352862306a36Sopenharmony_ci		}
352962306a36Sopenharmony_ci		if (abs(hg) < 100) {
353062306a36Sopenharmony_ci			drbd_warn(device, "Split-Brain detected, %d primaries, "
353162306a36Sopenharmony_ci			     "automatically solved. Sync from %s node\n",
353262306a36Sopenharmony_ci			     pcount, (hg < 0) ? "peer" : "this");
353362306a36Sopenharmony_ci			if (forced) {
353462306a36Sopenharmony_ci				drbd_warn(device, "Doing a full sync, since"
353562306a36Sopenharmony_ci				     " UUIDs where ambiguous.\n");
353662306a36Sopenharmony_ci				hg = hg*2;
353762306a36Sopenharmony_ci			}
353862306a36Sopenharmony_ci		}
353962306a36Sopenharmony_ci	}
354062306a36Sopenharmony_ci
354162306a36Sopenharmony_ci	if (hg == -100) {
354262306a36Sopenharmony_ci		if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
354362306a36Sopenharmony_ci			hg = -1;
354462306a36Sopenharmony_ci		if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
354562306a36Sopenharmony_ci			hg = 1;
354662306a36Sopenharmony_ci
354762306a36Sopenharmony_ci		if (abs(hg) < 100)
354862306a36Sopenharmony_ci			drbd_warn(device, "Split-Brain detected, manually solved. "
354962306a36Sopenharmony_ci			     "Sync from %s node\n",
355062306a36Sopenharmony_ci			     (hg < 0) ? "peer" : "this");
355162306a36Sopenharmony_ci	}
355262306a36Sopenharmony_ci
355362306a36Sopenharmony_ci	if (hg == -100) {
355462306a36Sopenharmony_ci		/* FIXME this log message is not correct if we end up here
355562306a36Sopenharmony_ci		 * after an attempted attach on a diskless node.
355662306a36Sopenharmony_ci		 * We just refuse to attach -- well, we drop the "connection"
355762306a36Sopenharmony_ci		 * to that disk, in a way... */
355862306a36Sopenharmony_ci		drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
355962306a36Sopenharmony_ci		drbd_khelper(device, "split-brain");
356062306a36Sopenharmony_ci		return C_MASK;
356162306a36Sopenharmony_ci	}
356262306a36Sopenharmony_ci
356362306a36Sopenharmony_ci	if (hg > 0 && mydisk <= D_INCONSISTENT) {
356462306a36Sopenharmony_ci		drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
356562306a36Sopenharmony_ci		return C_MASK;
356662306a36Sopenharmony_ci	}
356762306a36Sopenharmony_ci
356862306a36Sopenharmony_ci	if (hg < 0 && /* by intention we do not use mydisk here. */
356962306a36Sopenharmony_ci	    device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
357062306a36Sopenharmony_ci		switch (rr_conflict) {
357162306a36Sopenharmony_ci		case ASB_CALL_HELPER:
357262306a36Sopenharmony_ci			drbd_khelper(device, "pri-lost");
357362306a36Sopenharmony_ci			fallthrough;
357462306a36Sopenharmony_ci		case ASB_DISCONNECT:
357562306a36Sopenharmony_ci			drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
357662306a36Sopenharmony_ci			return C_MASK;
357762306a36Sopenharmony_ci		case ASB_VIOLENTLY:
357862306a36Sopenharmony_ci			drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
357962306a36Sopenharmony_ci			     "assumption\n");
358062306a36Sopenharmony_ci		}
358162306a36Sopenharmony_ci	}
358262306a36Sopenharmony_ci
358362306a36Sopenharmony_ci	if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
358462306a36Sopenharmony_ci		if (hg == 0)
358562306a36Sopenharmony_ci			drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
358662306a36Sopenharmony_ci		else
358762306a36Sopenharmony_ci			drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
358862306a36Sopenharmony_ci				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
358962306a36Sopenharmony_ci				 abs(hg) >= 2 ? "full" : "bit-map based");
359062306a36Sopenharmony_ci		return C_MASK;
359162306a36Sopenharmony_ci	}
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci	if (abs(hg) >= 2) {
359462306a36Sopenharmony_ci		drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
359562306a36Sopenharmony_ci		if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
359662306a36Sopenharmony_ci					BM_LOCKED_SET_ALLOWED, NULL))
359762306a36Sopenharmony_ci			return C_MASK;
359862306a36Sopenharmony_ci	}
359962306a36Sopenharmony_ci
360062306a36Sopenharmony_ci	if (hg > 0) { /* become sync source. */
360162306a36Sopenharmony_ci		rv = C_WF_BITMAP_S;
360262306a36Sopenharmony_ci	} else if (hg < 0) { /* become sync target */
360362306a36Sopenharmony_ci		rv = C_WF_BITMAP_T;
360462306a36Sopenharmony_ci	} else {
360562306a36Sopenharmony_ci		rv = C_CONNECTED;
360662306a36Sopenharmony_ci		if (drbd_bm_total_weight(device)) {
360762306a36Sopenharmony_ci			drbd_info(device, "No resync, but %lu bits in bitmap!\n",
360862306a36Sopenharmony_ci			     drbd_bm_total_weight(device));
360962306a36Sopenharmony_ci		}
361062306a36Sopenharmony_ci	}
361162306a36Sopenharmony_ci
361262306a36Sopenharmony_ci	return rv;
361362306a36Sopenharmony_ci}
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_cistatic enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
361662306a36Sopenharmony_ci{
361762306a36Sopenharmony_ci	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
361862306a36Sopenharmony_ci	if (peer == ASB_DISCARD_REMOTE)
361962306a36Sopenharmony_ci		return ASB_DISCARD_LOCAL;
362062306a36Sopenharmony_ci
362162306a36Sopenharmony_ci	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
362262306a36Sopenharmony_ci	if (peer == ASB_DISCARD_LOCAL)
362362306a36Sopenharmony_ci		return ASB_DISCARD_REMOTE;
362462306a36Sopenharmony_ci
362562306a36Sopenharmony_ci	/* everything else is valid if they are equal on both sides. */
362662306a36Sopenharmony_ci	return peer;
362762306a36Sopenharmony_ci}
362862306a36Sopenharmony_ci
362962306a36Sopenharmony_cistatic int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
363062306a36Sopenharmony_ci{
363162306a36Sopenharmony_ci	struct p_protocol *p = pi->data;
363262306a36Sopenharmony_ci	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
363362306a36Sopenharmony_ci	int p_proto, p_discard_my_data, p_two_primaries, cf;
363462306a36Sopenharmony_ci	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
363562306a36Sopenharmony_ci	char integrity_alg[SHARED_SECRET_MAX] = "";
363662306a36Sopenharmony_ci	struct crypto_shash *peer_integrity_tfm = NULL;
363762306a36Sopenharmony_ci	void *int_dig_in = NULL, *int_dig_vv = NULL;
363862306a36Sopenharmony_ci
363962306a36Sopenharmony_ci	p_proto		= be32_to_cpu(p->protocol);
364062306a36Sopenharmony_ci	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
364162306a36Sopenharmony_ci	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
364262306a36Sopenharmony_ci	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
364362306a36Sopenharmony_ci	p_two_primaries = be32_to_cpu(p->two_primaries);
364462306a36Sopenharmony_ci	cf		= be32_to_cpu(p->conn_flags);
364562306a36Sopenharmony_ci	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
364662306a36Sopenharmony_ci
364762306a36Sopenharmony_ci	if (connection->agreed_pro_version >= 87) {
364862306a36Sopenharmony_ci		int err;
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci		if (pi->size > sizeof(integrity_alg))
365162306a36Sopenharmony_ci			return -EIO;
365262306a36Sopenharmony_ci		err = drbd_recv_all(connection, integrity_alg, pi->size);
365362306a36Sopenharmony_ci		if (err)
365462306a36Sopenharmony_ci			return err;
365562306a36Sopenharmony_ci		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
365662306a36Sopenharmony_ci	}
365762306a36Sopenharmony_ci
365862306a36Sopenharmony_ci	if (pi->cmd != P_PROTOCOL_UPDATE) {
365962306a36Sopenharmony_ci		clear_bit(CONN_DRY_RUN, &connection->flags);
366062306a36Sopenharmony_ci
366162306a36Sopenharmony_ci		if (cf & CF_DRY_RUN)
366262306a36Sopenharmony_ci			set_bit(CONN_DRY_RUN, &connection->flags);
366362306a36Sopenharmony_ci
366462306a36Sopenharmony_ci		rcu_read_lock();
366562306a36Sopenharmony_ci		nc = rcu_dereference(connection->net_conf);
366662306a36Sopenharmony_ci
366762306a36Sopenharmony_ci		if (p_proto != nc->wire_protocol) {
366862306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "protocol");
366962306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
367062306a36Sopenharmony_ci		}
367162306a36Sopenharmony_ci
367262306a36Sopenharmony_ci		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
367362306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
367462306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
367562306a36Sopenharmony_ci		}
367662306a36Sopenharmony_ci
367762306a36Sopenharmony_ci		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
367862306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
367962306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
368062306a36Sopenharmony_ci		}
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_ci		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
368362306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
368462306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
368562306a36Sopenharmony_ci		}
368662306a36Sopenharmony_ci
368762306a36Sopenharmony_ci		if (p_discard_my_data && nc->discard_my_data) {
368862306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
368962306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
369062306a36Sopenharmony_ci		}
369162306a36Sopenharmony_ci
369262306a36Sopenharmony_ci		if (p_two_primaries != nc->two_primaries) {
369362306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
369462306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
369562306a36Sopenharmony_ci		}
369662306a36Sopenharmony_ci
369762306a36Sopenharmony_ci		if (strcmp(integrity_alg, nc->integrity_alg)) {
369862306a36Sopenharmony_ci			drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
369962306a36Sopenharmony_ci			goto disconnect_rcu_unlock;
370062306a36Sopenharmony_ci		}
370162306a36Sopenharmony_ci
370262306a36Sopenharmony_ci		rcu_read_unlock();
370362306a36Sopenharmony_ci	}
370462306a36Sopenharmony_ci
370562306a36Sopenharmony_ci	if (integrity_alg[0]) {
370662306a36Sopenharmony_ci		int hash_size;
370762306a36Sopenharmony_ci
370862306a36Sopenharmony_ci		/*
370962306a36Sopenharmony_ci		 * We can only change the peer data integrity algorithm
371062306a36Sopenharmony_ci		 * here.  Changing our own data integrity algorithm
371162306a36Sopenharmony_ci		 * requires that we send a P_PROTOCOL_UPDATE packet at
371262306a36Sopenharmony_ci		 * the same time; otherwise, the peer has no way to
371362306a36Sopenharmony_ci		 * tell between which packets the algorithm should
371462306a36Sopenharmony_ci		 * change.
371562306a36Sopenharmony_ci		 */
371662306a36Sopenharmony_ci
371762306a36Sopenharmony_ci		peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, 0);
371862306a36Sopenharmony_ci		if (IS_ERR(peer_integrity_tfm)) {
371962306a36Sopenharmony_ci			peer_integrity_tfm = NULL;
372062306a36Sopenharmony_ci			drbd_err(connection, "peer data-integrity-alg %s not supported\n",
372162306a36Sopenharmony_ci				 integrity_alg);
372262306a36Sopenharmony_ci			goto disconnect;
372362306a36Sopenharmony_ci		}
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_ci		hash_size = crypto_shash_digestsize(peer_integrity_tfm);
372662306a36Sopenharmony_ci		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
372762306a36Sopenharmony_ci		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
372862306a36Sopenharmony_ci		if (!(int_dig_in && int_dig_vv)) {
372962306a36Sopenharmony_ci			drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
373062306a36Sopenharmony_ci			goto disconnect;
373162306a36Sopenharmony_ci		}
373262306a36Sopenharmony_ci	}
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_ci	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
373562306a36Sopenharmony_ci	if (!new_net_conf)
373662306a36Sopenharmony_ci		goto disconnect;
373762306a36Sopenharmony_ci
373862306a36Sopenharmony_ci	mutex_lock(&connection->data.mutex);
373962306a36Sopenharmony_ci	mutex_lock(&connection->resource->conf_update);
374062306a36Sopenharmony_ci	old_net_conf = connection->net_conf;
374162306a36Sopenharmony_ci	*new_net_conf = *old_net_conf;
374262306a36Sopenharmony_ci
374362306a36Sopenharmony_ci	new_net_conf->wire_protocol = p_proto;
374462306a36Sopenharmony_ci	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
374562306a36Sopenharmony_ci	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
374662306a36Sopenharmony_ci	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
374762306a36Sopenharmony_ci	new_net_conf->two_primaries = p_two_primaries;
374862306a36Sopenharmony_ci
374962306a36Sopenharmony_ci	rcu_assign_pointer(connection->net_conf, new_net_conf);
375062306a36Sopenharmony_ci	mutex_unlock(&connection->resource->conf_update);
375162306a36Sopenharmony_ci	mutex_unlock(&connection->data.mutex);
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci	crypto_free_shash(connection->peer_integrity_tfm);
375462306a36Sopenharmony_ci	kfree(connection->int_dig_in);
375562306a36Sopenharmony_ci	kfree(connection->int_dig_vv);
375662306a36Sopenharmony_ci	connection->peer_integrity_tfm = peer_integrity_tfm;
375762306a36Sopenharmony_ci	connection->int_dig_in = int_dig_in;
375862306a36Sopenharmony_ci	connection->int_dig_vv = int_dig_vv;
375962306a36Sopenharmony_ci
376062306a36Sopenharmony_ci	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
376162306a36Sopenharmony_ci		drbd_info(connection, "peer data-integrity-alg: %s\n",
376262306a36Sopenharmony_ci			  integrity_alg[0] ? integrity_alg : "(none)");
376362306a36Sopenharmony_ci
376462306a36Sopenharmony_ci	kvfree_rcu_mightsleep(old_net_conf);
376562306a36Sopenharmony_ci	return 0;
376662306a36Sopenharmony_ci
376762306a36Sopenharmony_cidisconnect_rcu_unlock:
376862306a36Sopenharmony_ci	rcu_read_unlock();
376962306a36Sopenharmony_cidisconnect:
377062306a36Sopenharmony_ci	crypto_free_shash(peer_integrity_tfm);
377162306a36Sopenharmony_ci	kfree(int_dig_in);
377262306a36Sopenharmony_ci	kfree(int_dig_vv);
377362306a36Sopenharmony_ci	conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
377462306a36Sopenharmony_ci	return -EIO;
377562306a36Sopenharmony_ci}
377662306a36Sopenharmony_ci
377762306a36Sopenharmony_ci/* helper function
377862306a36Sopenharmony_ci * input: alg name, feature name
377962306a36Sopenharmony_ci * return: NULL (alg name was "")
378062306a36Sopenharmony_ci *         ERR_PTR(error) if something goes wrong
378162306a36Sopenharmony_ci *         or the crypto hash ptr, if it worked out ok. */
378262306a36Sopenharmony_cistatic struct crypto_shash *drbd_crypto_alloc_digest_safe(
378362306a36Sopenharmony_ci		const struct drbd_device *device,
378462306a36Sopenharmony_ci		const char *alg, const char *name)
378562306a36Sopenharmony_ci{
378662306a36Sopenharmony_ci	struct crypto_shash *tfm;
378762306a36Sopenharmony_ci
378862306a36Sopenharmony_ci	if (!alg[0])
378962306a36Sopenharmony_ci		return NULL;
379062306a36Sopenharmony_ci
379162306a36Sopenharmony_ci	tfm = crypto_alloc_shash(alg, 0, 0);
379262306a36Sopenharmony_ci	if (IS_ERR(tfm)) {
379362306a36Sopenharmony_ci		drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
379462306a36Sopenharmony_ci			alg, name, PTR_ERR(tfm));
379562306a36Sopenharmony_ci		return tfm;
379662306a36Sopenharmony_ci	}
379762306a36Sopenharmony_ci	return tfm;
379862306a36Sopenharmony_ci}
379962306a36Sopenharmony_ci
380062306a36Sopenharmony_cistatic int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
380162306a36Sopenharmony_ci{
380262306a36Sopenharmony_ci	void *buffer = connection->data.rbuf;
380362306a36Sopenharmony_ci	int size = pi->size;
380462306a36Sopenharmony_ci
380562306a36Sopenharmony_ci	while (size) {
380662306a36Sopenharmony_ci		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
380762306a36Sopenharmony_ci		s = drbd_recv(connection, buffer, s);
380862306a36Sopenharmony_ci		if (s <= 0) {
380962306a36Sopenharmony_ci			if (s < 0)
381062306a36Sopenharmony_ci				return s;
381162306a36Sopenharmony_ci			break;
381262306a36Sopenharmony_ci		}
381362306a36Sopenharmony_ci		size -= s;
381462306a36Sopenharmony_ci	}
381562306a36Sopenharmony_ci	if (size)
381662306a36Sopenharmony_ci		return -EIO;
381762306a36Sopenharmony_ci	return 0;
381862306a36Sopenharmony_ci}
381962306a36Sopenharmony_ci
382062306a36Sopenharmony_ci/*
382162306a36Sopenharmony_ci * config_unknown_volume  -  device configuration command for unknown volume
382262306a36Sopenharmony_ci *
382362306a36Sopenharmony_ci * When a device is added to an existing connection, the node on which the
382462306a36Sopenharmony_ci * device is added first will send configuration commands to its peer but the
382562306a36Sopenharmony_ci * peer will not know about the device yet.  It will warn and ignore these
382662306a36Sopenharmony_ci * commands.  Once the device is added on the second node, the second node will
382762306a36Sopenharmony_ci * send the same device configuration commands, but in the other direction.
382862306a36Sopenharmony_ci *
382962306a36Sopenharmony_ci * (We can also end up here if drbd is misconfigured.)
383062306a36Sopenharmony_ci */
383162306a36Sopenharmony_cistatic int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
383262306a36Sopenharmony_ci{
383362306a36Sopenharmony_ci	drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
383462306a36Sopenharmony_ci		  cmdname(pi->cmd), pi->vnr);
383562306a36Sopenharmony_ci	return ignore_remaining_packet(connection, pi);
383662306a36Sopenharmony_ci}
383762306a36Sopenharmony_ci
383862306a36Sopenharmony_cistatic int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
383962306a36Sopenharmony_ci{
384062306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
384162306a36Sopenharmony_ci	struct drbd_device *device;
384262306a36Sopenharmony_ci	struct p_rs_param_95 *p;
384362306a36Sopenharmony_ci	unsigned int header_size, data_size, exp_max_sz;
384462306a36Sopenharmony_ci	struct crypto_shash *verify_tfm = NULL;
384562306a36Sopenharmony_ci	struct crypto_shash *csums_tfm = NULL;
384662306a36Sopenharmony_ci	struct net_conf *old_net_conf, *new_net_conf = NULL;
384762306a36Sopenharmony_ci	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
384862306a36Sopenharmony_ci	const int apv = connection->agreed_pro_version;
384962306a36Sopenharmony_ci	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
385062306a36Sopenharmony_ci	unsigned int fifo_size = 0;
385162306a36Sopenharmony_ci	int err;
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
385462306a36Sopenharmony_ci	if (!peer_device)
385562306a36Sopenharmony_ci		return config_unknown_volume(connection, pi);
385662306a36Sopenharmony_ci	device = peer_device->device;
385762306a36Sopenharmony_ci
385862306a36Sopenharmony_ci	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
385962306a36Sopenharmony_ci		    : apv == 88 ? sizeof(struct p_rs_param)
386062306a36Sopenharmony_ci					+ SHARED_SECRET_MAX
386162306a36Sopenharmony_ci		    : apv <= 94 ? sizeof(struct p_rs_param_89)
386262306a36Sopenharmony_ci		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_ci	if (pi->size > exp_max_sz) {
386562306a36Sopenharmony_ci		drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
386662306a36Sopenharmony_ci		    pi->size, exp_max_sz);
386762306a36Sopenharmony_ci		return -EIO;
386862306a36Sopenharmony_ci	}
386962306a36Sopenharmony_ci
387062306a36Sopenharmony_ci	if (apv <= 88) {
387162306a36Sopenharmony_ci		header_size = sizeof(struct p_rs_param);
387262306a36Sopenharmony_ci		data_size = pi->size - header_size;
387362306a36Sopenharmony_ci	} else if (apv <= 94) {
387462306a36Sopenharmony_ci		header_size = sizeof(struct p_rs_param_89);
387562306a36Sopenharmony_ci		data_size = pi->size - header_size;
387662306a36Sopenharmony_ci		D_ASSERT(device, data_size == 0);
387762306a36Sopenharmony_ci	} else {
387862306a36Sopenharmony_ci		header_size = sizeof(struct p_rs_param_95);
387962306a36Sopenharmony_ci		data_size = pi->size - header_size;
388062306a36Sopenharmony_ci		D_ASSERT(device, data_size == 0);
388162306a36Sopenharmony_ci	}
388262306a36Sopenharmony_ci
388362306a36Sopenharmony_ci	/* initialize verify_alg and csums_alg */
388462306a36Sopenharmony_ci	p = pi->data;
388562306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
388662306a36Sopenharmony_ci	memset(&p->algs, 0, sizeof(p->algs));
388762306a36Sopenharmony_ci
388862306a36Sopenharmony_ci	err = drbd_recv_all(peer_device->connection, p, header_size);
388962306a36Sopenharmony_ci	if (err)
389062306a36Sopenharmony_ci		return err;
389162306a36Sopenharmony_ci
389262306a36Sopenharmony_ci	mutex_lock(&connection->resource->conf_update);
389362306a36Sopenharmony_ci	old_net_conf = peer_device->connection->net_conf;
389462306a36Sopenharmony_ci	if (get_ldev(device)) {
389562306a36Sopenharmony_ci		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
389662306a36Sopenharmony_ci		if (!new_disk_conf) {
389762306a36Sopenharmony_ci			put_ldev(device);
389862306a36Sopenharmony_ci			mutex_unlock(&connection->resource->conf_update);
389962306a36Sopenharmony_ci			drbd_err(device, "Allocation of new disk_conf failed\n");
390062306a36Sopenharmony_ci			return -ENOMEM;
390162306a36Sopenharmony_ci		}
390262306a36Sopenharmony_ci
390362306a36Sopenharmony_ci		old_disk_conf = device->ldev->disk_conf;
390462306a36Sopenharmony_ci		*new_disk_conf = *old_disk_conf;
390562306a36Sopenharmony_ci
390662306a36Sopenharmony_ci		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
390762306a36Sopenharmony_ci	}
390862306a36Sopenharmony_ci
390962306a36Sopenharmony_ci	if (apv >= 88) {
391062306a36Sopenharmony_ci		if (apv == 88) {
391162306a36Sopenharmony_ci			if (data_size > SHARED_SECRET_MAX || data_size == 0) {
391262306a36Sopenharmony_ci				drbd_err(device, "verify-alg of wrong size, "
391362306a36Sopenharmony_ci					"peer wants %u, accepting only up to %u byte\n",
391462306a36Sopenharmony_ci					data_size, SHARED_SECRET_MAX);
391562306a36Sopenharmony_ci				goto reconnect;
391662306a36Sopenharmony_ci			}
391762306a36Sopenharmony_ci
391862306a36Sopenharmony_ci			err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
391962306a36Sopenharmony_ci			if (err)
392062306a36Sopenharmony_ci				goto reconnect;
392162306a36Sopenharmony_ci			/* we expect NUL terminated string */
392262306a36Sopenharmony_ci			/* but just in case someone tries to be evil */
392362306a36Sopenharmony_ci			D_ASSERT(device, p->verify_alg[data_size-1] == 0);
392462306a36Sopenharmony_ci			p->verify_alg[data_size-1] = 0;
392562306a36Sopenharmony_ci
392662306a36Sopenharmony_ci		} else /* apv >= 89 */ {
392762306a36Sopenharmony_ci			/* we still expect NUL terminated strings */
392862306a36Sopenharmony_ci			/* but just in case someone tries to be evil */
392962306a36Sopenharmony_ci			D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
393062306a36Sopenharmony_ci			D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
393162306a36Sopenharmony_ci			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
393262306a36Sopenharmony_ci			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
393362306a36Sopenharmony_ci		}
393462306a36Sopenharmony_ci
393562306a36Sopenharmony_ci		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
393662306a36Sopenharmony_ci			if (device->state.conn == C_WF_REPORT_PARAMS) {
393762306a36Sopenharmony_ci				drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
393862306a36Sopenharmony_ci				    old_net_conf->verify_alg, p->verify_alg);
393962306a36Sopenharmony_ci				goto disconnect;
394062306a36Sopenharmony_ci			}
394162306a36Sopenharmony_ci			verify_tfm = drbd_crypto_alloc_digest_safe(device,
394262306a36Sopenharmony_ci					p->verify_alg, "verify-alg");
394362306a36Sopenharmony_ci			if (IS_ERR(verify_tfm)) {
394462306a36Sopenharmony_ci				verify_tfm = NULL;
394562306a36Sopenharmony_ci				goto disconnect;
394662306a36Sopenharmony_ci			}
394762306a36Sopenharmony_ci		}
394862306a36Sopenharmony_ci
394962306a36Sopenharmony_ci		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
395062306a36Sopenharmony_ci			if (device->state.conn == C_WF_REPORT_PARAMS) {
395162306a36Sopenharmony_ci				drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
395262306a36Sopenharmony_ci				    old_net_conf->csums_alg, p->csums_alg);
395362306a36Sopenharmony_ci				goto disconnect;
395462306a36Sopenharmony_ci			}
395562306a36Sopenharmony_ci			csums_tfm = drbd_crypto_alloc_digest_safe(device,
395662306a36Sopenharmony_ci					p->csums_alg, "csums-alg");
395762306a36Sopenharmony_ci			if (IS_ERR(csums_tfm)) {
395862306a36Sopenharmony_ci				csums_tfm = NULL;
395962306a36Sopenharmony_ci				goto disconnect;
396062306a36Sopenharmony_ci			}
396162306a36Sopenharmony_ci		}
396262306a36Sopenharmony_ci
396362306a36Sopenharmony_ci		if (apv > 94 && new_disk_conf) {
396462306a36Sopenharmony_ci			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
396562306a36Sopenharmony_ci			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
396662306a36Sopenharmony_ci			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
396762306a36Sopenharmony_ci			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
396862306a36Sopenharmony_ci
396962306a36Sopenharmony_ci			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
397062306a36Sopenharmony_ci			if (fifo_size != device->rs_plan_s->size) {
397162306a36Sopenharmony_ci				new_plan = fifo_alloc(fifo_size);
397262306a36Sopenharmony_ci				if (!new_plan) {
397362306a36Sopenharmony_ci					drbd_err(device, "kmalloc of fifo_buffer failed");
397462306a36Sopenharmony_ci					put_ldev(device);
397562306a36Sopenharmony_ci					goto disconnect;
397662306a36Sopenharmony_ci				}
397762306a36Sopenharmony_ci			}
397862306a36Sopenharmony_ci		}
397962306a36Sopenharmony_ci
398062306a36Sopenharmony_ci		if (verify_tfm || csums_tfm) {
398162306a36Sopenharmony_ci			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
398262306a36Sopenharmony_ci			if (!new_net_conf)
398362306a36Sopenharmony_ci				goto disconnect;
398462306a36Sopenharmony_ci
398562306a36Sopenharmony_ci			*new_net_conf = *old_net_conf;
398662306a36Sopenharmony_ci
398762306a36Sopenharmony_ci			if (verify_tfm) {
398862306a36Sopenharmony_ci				strcpy(new_net_conf->verify_alg, p->verify_alg);
398962306a36Sopenharmony_ci				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
399062306a36Sopenharmony_ci				crypto_free_shash(peer_device->connection->verify_tfm);
399162306a36Sopenharmony_ci				peer_device->connection->verify_tfm = verify_tfm;
399262306a36Sopenharmony_ci				drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
399362306a36Sopenharmony_ci			}
399462306a36Sopenharmony_ci			if (csums_tfm) {
399562306a36Sopenharmony_ci				strcpy(new_net_conf->csums_alg, p->csums_alg);
399662306a36Sopenharmony_ci				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
399762306a36Sopenharmony_ci				crypto_free_shash(peer_device->connection->csums_tfm);
399862306a36Sopenharmony_ci				peer_device->connection->csums_tfm = csums_tfm;
399962306a36Sopenharmony_ci				drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
400062306a36Sopenharmony_ci			}
400162306a36Sopenharmony_ci			rcu_assign_pointer(connection->net_conf, new_net_conf);
400262306a36Sopenharmony_ci		}
400362306a36Sopenharmony_ci	}
400462306a36Sopenharmony_ci
400562306a36Sopenharmony_ci	if (new_disk_conf) {
400662306a36Sopenharmony_ci		rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
400762306a36Sopenharmony_ci		put_ldev(device);
400862306a36Sopenharmony_ci	}
400962306a36Sopenharmony_ci
401062306a36Sopenharmony_ci	if (new_plan) {
401162306a36Sopenharmony_ci		old_plan = device->rs_plan_s;
401262306a36Sopenharmony_ci		rcu_assign_pointer(device->rs_plan_s, new_plan);
401362306a36Sopenharmony_ci	}
401462306a36Sopenharmony_ci
401562306a36Sopenharmony_ci	mutex_unlock(&connection->resource->conf_update);
401662306a36Sopenharmony_ci	synchronize_rcu();
401762306a36Sopenharmony_ci	if (new_net_conf)
401862306a36Sopenharmony_ci		kfree(old_net_conf);
401962306a36Sopenharmony_ci	kfree(old_disk_conf);
402062306a36Sopenharmony_ci	kfree(old_plan);
402162306a36Sopenharmony_ci
402262306a36Sopenharmony_ci	return 0;
402362306a36Sopenharmony_ci
402462306a36Sopenharmony_cireconnect:
402562306a36Sopenharmony_ci	if (new_disk_conf) {
402662306a36Sopenharmony_ci		put_ldev(device);
402762306a36Sopenharmony_ci		kfree(new_disk_conf);
402862306a36Sopenharmony_ci	}
402962306a36Sopenharmony_ci	mutex_unlock(&connection->resource->conf_update);
403062306a36Sopenharmony_ci	return -EIO;
403162306a36Sopenharmony_ci
403262306a36Sopenharmony_cidisconnect:
403362306a36Sopenharmony_ci	kfree(new_plan);
403462306a36Sopenharmony_ci	if (new_disk_conf) {
403562306a36Sopenharmony_ci		put_ldev(device);
403662306a36Sopenharmony_ci		kfree(new_disk_conf);
403762306a36Sopenharmony_ci	}
403862306a36Sopenharmony_ci	mutex_unlock(&connection->resource->conf_update);
403962306a36Sopenharmony_ci	/* just for completeness: actually not needed,
404062306a36Sopenharmony_ci	 * as this is not reached if csums_tfm was ok. */
404162306a36Sopenharmony_ci	crypto_free_shash(csums_tfm);
404262306a36Sopenharmony_ci	/* but free the verify_tfm again, if csums_tfm did not work out */
404362306a36Sopenharmony_ci	crypto_free_shash(verify_tfm);
404462306a36Sopenharmony_ci	conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
404562306a36Sopenharmony_ci	return -EIO;
404662306a36Sopenharmony_ci}
404762306a36Sopenharmony_ci
404862306a36Sopenharmony_ci/* warn if the arguments differ by more than 12.5% */
404962306a36Sopenharmony_cistatic void warn_if_differ_considerably(struct drbd_device *device,
405062306a36Sopenharmony_ci	const char *s, sector_t a, sector_t b)
405162306a36Sopenharmony_ci{
405262306a36Sopenharmony_ci	sector_t d;
405362306a36Sopenharmony_ci	if (a == 0 || b == 0)
405462306a36Sopenharmony_ci		return;
405562306a36Sopenharmony_ci	d = (a > b) ? (a - b) : (b - a);
405662306a36Sopenharmony_ci	if (d > (a>>3) || d > (b>>3))
405762306a36Sopenharmony_ci		drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
405862306a36Sopenharmony_ci		     (unsigned long long)a, (unsigned long long)b);
405962306a36Sopenharmony_ci}
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_cistatic int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
406262306a36Sopenharmony_ci{
406362306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
406462306a36Sopenharmony_ci	struct drbd_device *device;
406562306a36Sopenharmony_ci	struct p_sizes *p = pi->data;
406662306a36Sopenharmony_ci	struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
406762306a36Sopenharmony_ci	enum determine_dev_size dd = DS_UNCHANGED;
406862306a36Sopenharmony_ci	sector_t p_size, p_usize, p_csize, my_usize;
406962306a36Sopenharmony_ci	sector_t new_size, cur_size;
407062306a36Sopenharmony_ci	int ldsc = 0; /* local disk size changed */
407162306a36Sopenharmony_ci	enum dds_flags ddsf;
407262306a36Sopenharmony_ci
407362306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
407462306a36Sopenharmony_ci	if (!peer_device)
407562306a36Sopenharmony_ci		return config_unknown_volume(connection, pi);
407662306a36Sopenharmony_ci	device = peer_device->device;
407762306a36Sopenharmony_ci	cur_size = get_capacity(device->vdisk);
407862306a36Sopenharmony_ci
407962306a36Sopenharmony_ci	p_size = be64_to_cpu(p->d_size);
408062306a36Sopenharmony_ci	p_usize = be64_to_cpu(p->u_size);
408162306a36Sopenharmony_ci	p_csize = be64_to_cpu(p->c_size);
408262306a36Sopenharmony_ci
408362306a36Sopenharmony_ci	/* just store the peer's disk size for now.
408462306a36Sopenharmony_ci	 * we still need to figure out whether we accept that. */
408562306a36Sopenharmony_ci	device->p_size = p_size;
408662306a36Sopenharmony_ci
408762306a36Sopenharmony_ci	if (get_ldev(device)) {
408862306a36Sopenharmony_ci		rcu_read_lock();
408962306a36Sopenharmony_ci		my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
409062306a36Sopenharmony_ci		rcu_read_unlock();
409162306a36Sopenharmony_ci
409262306a36Sopenharmony_ci		warn_if_differ_considerably(device, "lower level device sizes",
409362306a36Sopenharmony_ci			   p_size, drbd_get_max_capacity(device->ldev));
409462306a36Sopenharmony_ci		warn_if_differ_considerably(device, "user requested size",
409562306a36Sopenharmony_ci					    p_usize, my_usize);
409662306a36Sopenharmony_ci
409762306a36Sopenharmony_ci		/* if this is the first connect, or an otherwise expected
409862306a36Sopenharmony_ci		 * param exchange, choose the minimum */
409962306a36Sopenharmony_ci		if (device->state.conn == C_WF_REPORT_PARAMS)
410062306a36Sopenharmony_ci			p_usize = min_not_zero(my_usize, p_usize);
410162306a36Sopenharmony_ci
410262306a36Sopenharmony_ci		/* Never shrink a device with usable data during connect,
410362306a36Sopenharmony_ci		 * or "attach" on the peer.
410462306a36Sopenharmony_ci		 * But allow online shrinking if we are connected. */
410562306a36Sopenharmony_ci		new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
410662306a36Sopenharmony_ci		if (new_size < cur_size &&
410762306a36Sopenharmony_ci		    device->state.disk >= D_OUTDATED &&
410862306a36Sopenharmony_ci		    (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
410962306a36Sopenharmony_ci			drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
411062306a36Sopenharmony_ci					(unsigned long long)new_size, (unsigned long long)cur_size);
411162306a36Sopenharmony_ci			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
411262306a36Sopenharmony_ci			put_ldev(device);
411362306a36Sopenharmony_ci			return -EIO;
411462306a36Sopenharmony_ci		}
411562306a36Sopenharmony_ci
411662306a36Sopenharmony_ci		if (my_usize != p_usize) {
411762306a36Sopenharmony_ci			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
411862306a36Sopenharmony_ci
411962306a36Sopenharmony_ci			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
412062306a36Sopenharmony_ci			if (!new_disk_conf) {
412162306a36Sopenharmony_ci				put_ldev(device);
412262306a36Sopenharmony_ci				return -ENOMEM;
412362306a36Sopenharmony_ci			}
412462306a36Sopenharmony_ci
412562306a36Sopenharmony_ci			mutex_lock(&connection->resource->conf_update);
412662306a36Sopenharmony_ci			old_disk_conf = device->ldev->disk_conf;
412762306a36Sopenharmony_ci			*new_disk_conf = *old_disk_conf;
412862306a36Sopenharmony_ci			new_disk_conf->disk_size = p_usize;
412962306a36Sopenharmony_ci
413062306a36Sopenharmony_ci			rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
413162306a36Sopenharmony_ci			mutex_unlock(&connection->resource->conf_update);
413262306a36Sopenharmony_ci			kvfree_rcu_mightsleep(old_disk_conf);
413362306a36Sopenharmony_ci
413462306a36Sopenharmony_ci			drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
413562306a36Sopenharmony_ci				 (unsigned long)p_usize, (unsigned long)my_usize);
413662306a36Sopenharmony_ci		}
413762306a36Sopenharmony_ci
413862306a36Sopenharmony_ci		put_ldev(device);
413962306a36Sopenharmony_ci	}
414062306a36Sopenharmony_ci
414162306a36Sopenharmony_ci	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
414262306a36Sopenharmony_ci	/* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
414362306a36Sopenharmony_ci	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
414462306a36Sopenharmony_ci	   drbd_reconsider_queue_parameters(), we can be sure that after
414562306a36Sopenharmony_ci	   drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
414662306a36Sopenharmony_ci
414762306a36Sopenharmony_ci	ddsf = be16_to_cpu(p->dds_flags);
414862306a36Sopenharmony_ci	if (get_ldev(device)) {
414962306a36Sopenharmony_ci		drbd_reconsider_queue_parameters(device, device->ldev, o);
415062306a36Sopenharmony_ci		dd = drbd_determine_dev_size(device, ddsf, NULL);
415162306a36Sopenharmony_ci		put_ldev(device);
415262306a36Sopenharmony_ci		if (dd == DS_ERROR)
415362306a36Sopenharmony_ci			return -EIO;
415462306a36Sopenharmony_ci		drbd_md_sync(device);
415562306a36Sopenharmony_ci	} else {
415662306a36Sopenharmony_ci		/*
415762306a36Sopenharmony_ci		 * I am diskless, need to accept the peer's *current* size.
415862306a36Sopenharmony_ci		 * I must NOT accept the peers backing disk size,
415962306a36Sopenharmony_ci		 * it may have been larger than mine all along...
416062306a36Sopenharmony_ci		 *
416162306a36Sopenharmony_ci		 * At this point, the peer knows more about my disk, or at
416262306a36Sopenharmony_ci		 * least about what we last agreed upon, than myself.
416362306a36Sopenharmony_ci		 * So if his c_size is less than his d_size, the most likely
416462306a36Sopenharmony_ci		 * reason is that *my* d_size was smaller last time we checked.
416562306a36Sopenharmony_ci		 *
416662306a36Sopenharmony_ci		 * However, if he sends a zero current size,
416762306a36Sopenharmony_ci		 * take his (user-capped or) backing disk size anyways.
416862306a36Sopenharmony_ci		 *
416962306a36Sopenharmony_ci		 * Unless of course he does not have a disk himself.
417062306a36Sopenharmony_ci		 * In which case we ignore this completely.
417162306a36Sopenharmony_ci		 */
417262306a36Sopenharmony_ci		sector_t new_size = p_csize ?: p_usize ?: p_size;
417362306a36Sopenharmony_ci		drbd_reconsider_queue_parameters(device, NULL, o);
417462306a36Sopenharmony_ci		if (new_size == 0) {
417562306a36Sopenharmony_ci			/* Ignore, peer does not know nothing. */
417662306a36Sopenharmony_ci		} else if (new_size == cur_size) {
417762306a36Sopenharmony_ci			/* nothing to do */
417862306a36Sopenharmony_ci		} else if (cur_size != 0 && p_size == 0) {
417962306a36Sopenharmony_ci			drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
418062306a36Sopenharmony_ci					(unsigned long long)new_size, (unsigned long long)cur_size);
418162306a36Sopenharmony_ci		} else if (new_size < cur_size && device->state.role == R_PRIMARY) {
418262306a36Sopenharmony_ci			drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
418362306a36Sopenharmony_ci					(unsigned long long)new_size, (unsigned long long)cur_size);
418462306a36Sopenharmony_ci			conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
418562306a36Sopenharmony_ci			return -EIO;
418662306a36Sopenharmony_ci		} else {
418762306a36Sopenharmony_ci			/* I believe the peer, if
418862306a36Sopenharmony_ci			 *  - I don't have a current size myself
418962306a36Sopenharmony_ci			 *  - we agree on the size anyways
419062306a36Sopenharmony_ci			 *  - I do have a current size, am Secondary,
419162306a36Sopenharmony_ci			 *    and he has the only disk
419262306a36Sopenharmony_ci			 *  - I do have a current size, am Primary,
419362306a36Sopenharmony_ci			 *    and he has the only disk,
419462306a36Sopenharmony_ci			 *    which is larger than my current size
419562306a36Sopenharmony_ci			 */
419662306a36Sopenharmony_ci			drbd_set_my_capacity(device, new_size);
419762306a36Sopenharmony_ci		}
419862306a36Sopenharmony_ci	}
419962306a36Sopenharmony_ci
420062306a36Sopenharmony_ci	if (get_ldev(device)) {
420162306a36Sopenharmony_ci		if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
420262306a36Sopenharmony_ci			device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
420362306a36Sopenharmony_ci			ldsc = 1;
420462306a36Sopenharmony_ci		}
420562306a36Sopenharmony_ci
420662306a36Sopenharmony_ci		put_ldev(device);
420762306a36Sopenharmony_ci	}
420862306a36Sopenharmony_ci
420962306a36Sopenharmony_ci	if (device->state.conn > C_WF_REPORT_PARAMS) {
421062306a36Sopenharmony_ci		if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
421162306a36Sopenharmony_ci		    ldsc) {
421262306a36Sopenharmony_ci			/* we have different sizes, probably peer
421362306a36Sopenharmony_ci			 * needs to know my new size... */
421462306a36Sopenharmony_ci			drbd_send_sizes(peer_device, 0, ddsf);
421562306a36Sopenharmony_ci		}
421662306a36Sopenharmony_ci		if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
421762306a36Sopenharmony_ci		    (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
421862306a36Sopenharmony_ci			if (device->state.pdsk >= D_INCONSISTENT &&
421962306a36Sopenharmony_ci			    device->state.disk >= D_INCONSISTENT) {
422062306a36Sopenharmony_ci				if (ddsf & DDSF_NO_RESYNC)
422162306a36Sopenharmony_ci					drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
422262306a36Sopenharmony_ci				else
422362306a36Sopenharmony_ci					resync_after_online_grow(device);
422462306a36Sopenharmony_ci			} else
422562306a36Sopenharmony_ci				set_bit(RESYNC_AFTER_NEG, &device->flags);
422662306a36Sopenharmony_ci		}
422762306a36Sopenharmony_ci	}
422862306a36Sopenharmony_ci
422962306a36Sopenharmony_ci	return 0;
423062306a36Sopenharmony_ci}
423162306a36Sopenharmony_ci
423262306a36Sopenharmony_cistatic int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
423362306a36Sopenharmony_ci{
423462306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
423562306a36Sopenharmony_ci	struct drbd_device *device;
423662306a36Sopenharmony_ci	struct p_uuids *p = pi->data;
423762306a36Sopenharmony_ci	u64 *p_uuid;
423862306a36Sopenharmony_ci	int i, updated_uuids = 0;
423962306a36Sopenharmony_ci
424062306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
424162306a36Sopenharmony_ci	if (!peer_device)
424262306a36Sopenharmony_ci		return config_unknown_volume(connection, pi);
424362306a36Sopenharmony_ci	device = peer_device->device;
424462306a36Sopenharmony_ci
424562306a36Sopenharmony_ci	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
424662306a36Sopenharmony_ci	if (!p_uuid)
424762306a36Sopenharmony_ci		return false;
424862306a36Sopenharmony_ci
424962306a36Sopenharmony_ci	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
425062306a36Sopenharmony_ci		p_uuid[i] = be64_to_cpu(p->uuid[i]);
425162306a36Sopenharmony_ci
425262306a36Sopenharmony_ci	kfree(device->p_uuid);
425362306a36Sopenharmony_ci	device->p_uuid = p_uuid;
425462306a36Sopenharmony_ci
425562306a36Sopenharmony_ci	if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
425662306a36Sopenharmony_ci	    device->state.disk < D_INCONSISTENT &&
425762306a36Sopenharmony_ci	    device->state.role == R_PRIMARY &&
425862306a36Sopenharmony_ci	    (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
425962306a36Sopenharmony_ci		drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
426062306a36Sopenharmony_ci		    (unsigned long long)device->ed_uuid);
426162306a36Sopenharmony_ci		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
426262306a36Sopenharmony_ci		return -EIO;
426362306a36Sopenharmony_ci	}
426462306a36Sopenharmony_ci
426562306a36Sopenharmony_ci	if (get_ldev(device)) {
426662306a36Sopenharmony_ci		int skip_initial_sync =
426762306a36Sopenharmony_ci			device->state.conn == C_CONNECTED &&
426862306a36Sopenharmony_ci			peer_device->connection->agreed_pro_version >= 90 &&
426962306a36Sopenharmony_ci			device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
427062306a36Sopenharmony_ci			(p_uuid[UI_FLAGS] & 8);
427162306a36Sopenharmony_ci		if (skip_initial_sync) {
427262306a36Sopenharmony_ci			drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
427362306a36Sopenharmony_ci			drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
427462306a36Sopenharmony_ci					"clear_n_write from receive_uuids",
427562306a36Sopenharmony_ci					BM_LOCKED_TEST_ALLOWED, NULL);
427662306a36Sopenharmony_ci			_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
427762306a36Sopenharmony_ci			_drbd_uuid_set(device, UI_BITMAP, 0);
427862306a36Sopenharmony_ci			_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
427962306a36Sopenharmony_ci					CS_VERBOSE, NULL);
428062306a36Sopenharmony_ci			drbd_md_sync(device);
428162306a36Sopenharmony_ci			updated_uuids = 1;
428262306a36Sopenharmony_ci		}
428362306a36Sopenharmony_ci		put_ldev(device);
428462306a36Sopenharmony_ci	} else if (device->state.disk < D_INCONSISTENT &&
428562306a36Sopenharmony_ci		   device->state.role == R_PRIMARY) {
428662306a36Sopenharmony_ci		/* I am a diskless primary, the peer just created a new current UUID
428762306a36Sopenharmony_ci		   for me. */
428862306a36Sopenharmony_ci		updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
428962306a36Sopenharmony_ci	}
429062306a36Sopenharmony_ci
429162306a36Sopenharmony_ci	/* Before we test for the disk state, we should wait until an eventually
429262306a36Sopenharmony_ci	   ongoing cluster wide state change is finished. That is important if
429362306a36Sopenharmony_ci	   we are primary and are detaching from our disk. We need to see the
429462306a36Sopenharmony_ci	   new disk state... */
429562306a36Sopenharmony_ci	mutex_lock(device->state_mutex);
429662306a36Sopenharmony_ci	mutex_unlock(device->state_mutex);
429762306a36Sopenharmony_ci	if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
429862306a36Sopenharmony_ci		updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
429962306a36Sopenharmony_ci
430062306a36Sopenharmony_ci	if (updated_uuids)
430162306a36Sopenharmony_ci		drbd_print_uuids(device, "receiver updated UUIDs to");
430262306a36Sopenharmony_ci
430362306a36Sopenharmony_ci	return 0;
430462306a36Sopenharmony_ci}
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_ci/**
430762306a36Sopenharmony_ci * convert_state() - Converts the peer's view of the cluster state to our point of view
430862306a36Sopenharmony_ci * @ps:		The state as seen by the peer.
430962306a36Sopenharmony_ci */
431062306a36Sopenharmony_cistatic union drbd_state convert_state(union drbd_state ps)
431162306a36Sopenharmony_ci{
431262306a36Sopenharmony_ci	union drbd_state ms;
431362306a36Sopenharmony_ci
431462306a36Sopenharmony_ci	static enum drbd_conns c_tab[] = {
431562306a36Sopenharmony_ci		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
431662306a36Sopenharmony_ci		[C_CONNECTED] = C_CONNECTED,
431762306a36Sopenharmony_ci
431862306a36Sopenharmony_ci		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
431962306a36Sopenharmony_ci		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
432062306a36Sopenharmony_ci		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
432162306a36Sopenharmony_ci		[C_VERIFY_S]       = C_VERIFY_T,
432262306a36Sopenharmony_ci		[C_MASK]   = C_MASK,
432362306a36Sopenharmony_ci	};
432462306a36Sopenharmony_ci
432562306a36Sopenharmony_ci	ms.i = ps.i;
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci	ms.conn = c_tab[ps.conn];
432862306a36Sopenharmony_ci	ms.peer = ps.role;
432962306a36Sopenharmony_ci	ms.role = ps.peer;
433062306a36Sopenharmony_ci	ms.pdsk = ps.disk;
433162306a36Sopenharmony_ci	ms.disk = ps.pdsk;
433262306a36Sopenharmony_ci	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
433362306a36Sopenharmony_ci
433462306a36Sopenharmony_ci	return ms;
433562306a36Sopenharmony_ci}
433662306a36Sopenharmony_ci
433762306a36Sopenharmony_cistatic int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
433862306a36Sopenharmony_ci{
433962306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
434062306a36Sopenharmony_ci	struct drbd_device *device;
434162306a36Sopenharmony_ci	struct p_req_state *p = pi->data;
434262306a36Sopenharmony_ci	union drbd_state mask, val;
434362306a36Sopenharmony_ci	enum drbd_state_rv rv;
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
434662306a36Sopenharmony_ci	if (!peer_device)
434762306a36Sopenharmony_ci		return -EIO;
434862306a36Sopenharmony_ci	device = peer_device->device;
434962306a36Sopenharmony_ci
435062306a36Sopenharmony_ci	mask.i = be32_to_cpu(p->mask);
435162306a36Sopenharmony_ci	val.i = be32_to_cpu(p->val);
435262306a36Sopenharmony_ci
435362306a36Sopenharmony_ci	if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
435462306a36Sopenharmony_ci	    mutex_is_locked(device->state_mutex)) {
435562306a36Sopenharmony_ci		drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
435662306a36Sopenharmony_ci		return 0;
435762306a36Sopenharmony_ci	}
435862306a36Sopenharmony_ci
435962306a36Sopenharmony_ci	mask = convert_state(mask);
436062306a36Sopenharmony_ci	val = convert_state(val);
436162306a36Sopenharmony_ci
436262306a36Sopenharmony_ci	rv = drbd_change_state(device, CS_VERBOSE, mask, val);
436362306a36Sopenharmony_ci	drbd_send_sr_reply(peer_device, rv);
436462306a36Sopenharmony_ci
436562306a36Sopenharmony_ci	drbd_md_sync(device);
436662306a36Sopenharmony_ci
436762306a36Sopenharmony_ci	return 0;
436862306a36Sopenharmony_ci}
436962306a36Sopenharmony_ci
437062306a36Sopenharmony_cistatic int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
437162306a36Sopenharmony_ci{
437262306a36Sopenharmony_ci	struct p_req_state *p = pi->data;
437362306a36Sopenharmony_ci	union drbd_state mask, val;
437462306a36Sopenharmony_ci	enum drbd_state_rv rv;
437562306a36Sopenharmony_ci
437662306a36Sopenharmony_ci	mask.i = be32_to_cpu(p->mask);
437762306a36Sopenharmony_ci	val.i = be32_to_cpu(p->val);
437862306a36Sopenharmony_ci
437962306a36Sopenharmony_ci	if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
438062306a36Sopenharmony_ci	    mutex_is_locked(&connection->cstate_mutex)) {
438162306a36Sopenharmony_ci		conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
438262306a36Sopenharmony_ci		return 0;
438362306a36Sopenharmony_ci	}
438462306a36Sopenharmony_ci
438562306a36Sopenharmony_ci	mask = convert_state(mask);
438662306a36Sopenharmony_ci	val = convert_state(val);
438762306a36Sopenharmony_ci
438862306a36Sopenharmony_ci	rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
438962306a36Sopenharmony_ci	conn_send_sr_reply(connection, rv);
439062306a36Sopenharmony_ci
439162306a36Sopenharmony_ci	return 0;
439262306a36Sopenharmony_ci}
439362306a36Sopenharmony_ci
439462306a36Sopenharmony_cistatic int receive_state(struct drbd_connection *connection, struct packet_info *pi)
439562306a36Sopenharmony_ci{
439662306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
439762306a36Sopenharmony_ci	struct drbd_device *device;
439862306a36Sopenharmony_ci	struct p_state *p = pi->data;
439962306a36Sopenharmony_ci	union drbd_state os, ns, peer_state;
440062306a36Sopenharmony_ci	enum drbd_disk_state real_peer_disk;
440162306a36Sopenharmony_ci	enum chg_state_flags cs_flags;
440262306a36Sopenharmony_ci	int rv;
440362306a36Sopenharmony_ci
440462306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
440562306a36Sopenharmony_ci	if (!peer_device)
440662306a36Sopenharmony_ci		return config_unknown_volume(connection, pi);
440762306a36Sopenharmony_ci	device = peer_device->device;
440862306a36Sopenharmony_ci
440962306a36Sopenharmony_ci	peer_state.i = be32_to_cpu(p->state);
441062306a36Sopenharmony_ci
441162306a36Sopenharmony_ci	real_peer_disk = peer_state.disk;
441262306a36Sopenharmony_ci	if (peer_state.disk == D_NEGOTIATING) {
441362306a36Sopenharmony_ci		real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
441462306a36Sopenharmony_ci		drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
441562306a36Sopenharmony_ci	}
441662306a36Sopenharmony_ci
441762306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
441862306a36Sopenharmony_ci retry:
441962306a36Sopenharmony_ci	os = ns = drbd_read_state(device);
442062306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
442162306a36Sopenharmony_ci
442262306a36Sopenharmony_ci	/* If some other part of the code (ack_receiver thread, timeout)
442362306a36Sopenharmony_ci	 * already decided to close the connection again,
442462306a36Sopenharmony_ci	 * we must not "re-establish" it here. */
442562306a36Sopenharmony_ci	if (os.conn <= C_TEAR_DOWN)
442662306a36Sopenharmony_ci		return -ECONNRESET;
442762306a36Sopenharmony_ci
442862306a36Sopenharmony_ci	/* If this is the "end of sync" confirmation, usually the peer disk
442962306a36Sopenharmony_ci	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
443062306a36Sopenharmony_ci	 * set) resync started in PausedSyncT, or if the timing of pause-/
443162306a36Sopenharmony_ci	 * unpause-sync events has been "just right", the peer disk may
443262306a36Sopenharmony_ci	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
443362306a36Sopenharmony_ci	 */
443462306a36Sopenharmony_ci	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
443562306a36Sopenharmony_ci	    real_peer_disk == D_UP_TO_DATE &&
443662306a36Sopenharmony_ci	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
443762306a36Sopenharmony_ci		/* If we are (becoming) SyncSource, but peer is still in sync
443862306a36Sopenharmony_ci		 * preparation, ignore its uptodate-ness to avoid flapping, it
443962306a36Sopenharmony_ci		 * will change to inconsistent once the peer reaches active
444062306a36Sopenharmony_ci		 * syncing states.
444162306a36Sopenharmony_ci		 * It may have changed syncer-paused flags, however, so we
444262306a36Sopenharmony_ci		 * cannot ignore this completely. */
444362306a36Sopenharmony_ci		if (peer_state.conn > C_CONNECTED &&
444462306a36Sopenharmony_ci		    peer_state.conn < C_SYNC_SOURCE)
444562306a36Sopenharmony_ci			real_peer_disk = D_INCONSISTENT;
444662306a36Sopenharmony_ci
444762306a36Sopenharmony_ci		/* if peer_state changes to connected at the same time,
444862306a36Sopenharmony_ci		 * it explicitly notifies us that it finished resync.
444962306a36Sopenharmony_ci		 * Maybe we should finish it up, too? */
445062306a36Sopenharmony_ci		else if (os.conn >= C_SYNC_SOURCE &&
445162306a36Sopenharmony_ci			 peer_state.conn == C_CONNECTED) {
445262306a36Sopenharmony_ci			if (drbd_bm_total_weight(device) <= device->rs_failed)
445362306a36Sopenharmony_ci				drbd_resync_finished(peer_device);
445462306a36Sopenharmony_ci			return 0;
445562306a36Sopenharmony_ci		}
445662306a36Sopenharmony_ci	}
445762306a36Sopenharmony_ci
445862306a36Sopenharmony_ci	/* explicit verify finished notification, stop sector reached. */
445962306a36Sopenharmony_ci	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
446062306a36Sopenharmony_ci	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
446162306a36Sopenharmony_ci		ov_out_of_sync_print(peer_device);
446262306a36Sopenharmony_ci		drbd_resync_finished(peer_device);
446362306a36Sopenharmony_ci		return 0;
446462306a36Sopenharmony_ci	}
446562306a36Sopenharmony_ci
446662306a36Sopenharmony_ci	/* peer says his disk is inconsistent, while we think it is uptodate,
446762306a36Sopenharmony_ci	 * and this happens while the peer still thinks we have a sync going on,
446862306a36Sopenharmony_ci	 * but we think we are already done with the sync.
446962306a36Sopenharmony_ci	 * We ignore this to avoid flapping pdsk.
447062306a36Sopenharmony_ci	 * This should not happen, if the peer is a recent version of drbd. */
447162306a36Sopenharmony_ci	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
447262306a36Sopenharmony_ci	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
447362306a36Sopenharmony_ci		real_peer_disk = D_UP_TO_DATE;
447462306a36Sopenharmony_ci
447562306a36Sopenharmony_ci	if (ns.conn == C_WF_REPORT_PARAMS)
447662306a36Sopenharmony_ci		ns.conn = C_CONNECTED;
447762306a36Sopenharmony_ci
447862306a36Sopenharmony_ci	if (peer_state.conn == C_AHEAD)
447962306a36Sopenharmony_ci		ns.conn = C_BEHIND;
448062306a36Sopenharmony_ci
448162306a36Sopenharmony_ci	/* TODO:
448262306a36Sopenharmony_ci	 * if (primary and diskless and peer uuid != effective uuid)
448362306a36Sopenharmony_ci	 *     abort attach on peer;
448462306a36Sopenharmony_ci	 *
448562306a36Sopenharmony_ci	 * If this node does not have good data, was already connected, but
448662306a36Sopenharmony_ci	 * the peer did a late attach only now, trying to "negotiate" with me,
448762306a36Sopenharmony_ci	 * AND I am currently Primary, possibly frozen, with some specific
448862306a36Sopenharmony_ci	 * "effective" uuid, this should never be reached, really, because
448962306a36Sopenharmony_ci	 * we first send the uuids, then the current state.
449062306a36Sopenharmony_ci	 *
449162306a36Sopenharmony_ci	 * In this scenario, we already dropped the connection hard
449262306a36Sopenharmony_ci	 * when we received the unsuitable uuids (receive_uuids().
449362306a36Sopenharmony_ci	 *
449462306a36Sopenharmony_ci	 * Should we want to change this, that is: not drop the connection in
449562306a36Sopenharmony_ci	 * receive_uuids() already, then we would need to add a branch here
449662306a36Sopenharmony_ci	 * that aborts the attach of "unsuitable uuids" on the peer in case
449762306a36Sopenharmony_ci	 * this node is currently Diskless Primary.
449862306a36Sopenharmony_ci	 */
449962306a36Sopenharmony_ci
450062306a36Sopenharmony_ci	if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
450162306a36Sopenharmony_ci	    get_ldev_if_state(device, D_NEGOTIATING)) {
450262306a36Sopenharmony_ci		int cr; /* consider resync */
450362306a36Sopenharmony_ci
450462306a36Sopenharmony_ci		/* if we established a new connection */
450562306a36Sopenharmony_ci		cr  = (os.conn < C_CONNECTED);
450662306a36Sopenharmony_ci		/* if we had an established connection
450762306a36Sopenharmony_ci		 * and one of the nodes newly attaches a disk */
450862306a36Sopenharmony_ci		cr |= (os.conn == C_CONNECTED &&
450962306a36Sopenharmony_ci		       (peer_state.disk == D_NEGOTIATING ||
451062306a36Sopenharmony_ci			os.disk == D_NEGOTIATING));
451162306a36Sopenharmony_ci		/* if we have both been inconsistent, and the peer has been
451262306a36Sopenharmony_ci		 * forced to be UpToDate with --force */
451362306a36Sopenharmony_ci		cr |= test_bit(CONSIDER_RESYNC, &device->flags);
451462306a36Sopenharmony_ci		/* if we had been plain connected, and the admin requested to
451562306a36Sopenharmony_ci		 * start a sync by "invalidate" or "invalidate-remote" */
451662306a36Sopenharmony_ci		cr |= (os.conn == C_CONNECTED &&
451762306a36Sopenharmony_ci				(peer_state.conn >= C_STARTING_SYNC_S &&
451862306a36Sopenharmony_ci				 peer_state.conn <= C_WF_BITMAP_T));
451962306a36Sopenharmony_ci
452062306a36Sopenharmony_ci		if (cr)
452162306a36Sopenharmony_ci			ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
452262306a36Sopenharmony_ci
452362306a36Sopenharmony_ci		put_ldev(device);
452462306a36Sopenharmony_ci		if (ns.conn == C_MASK) {
452562306a36Sopenharmony_ci			ns.conn = C_CONNECTED;
452662306a36Sopenharmony_ci			if (device->state.disk == D_NEGOTIATING) {
452762306a36Sopenharmony_ci				drbd_force_state(device, NS(disk, D_FAILED));
452862306a36Sopenharmony_ci			} else if (peer_state.disk == D_NEGOTIATING) {
452962306a36Sopenharmony_ci				drbd_err(device, "Disk attach process on the peer node was aborted.\n");
453062306a36Sopenharmony_ci				peer_state.disk = D_DISKLESS;
453162306a36Sopenharmony_ci				real_peer_disk = D_DISKLESS;
453262306a36Sopenharmony_ci			} else {
453362306a36Sopenharmony_ci				if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
453462306a36Sopenharmony_ci					return -EIO;
453562306a36Sopenharmony_ci				D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
453662306a36Sopenharmony_ci				conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
453762306a36Sopenharmony_ci				return -EIO;
453862306a36Sopenharmony_ci			}
453962306a36Sopenharmony_ci		}
454062306a36Sopenharmony_ci	}
454162306a36Sopenharmony_ci
454262306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
454362306a36Sopenharmony_ci	if (os.i != drbd_read_state(device).i)
454462306a36Sopenharmony_ci		goto retry;
454562306a36Sopenharmony_ci	clear_bit(CONSIDER_RESYNC, &device->flags);
454662306a36Sopenharmony_ci	ns.peer = peer_state.role;
454762306a36Sopenharmony_ci	ns.pdsk = real_peer_disk;
454862306a36Sopenharmony_ci	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
454962306a36Sopenharmony_ci	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
455062306a36Sopenharmony_ci		ns.disk = device->new_state_tmp.disk;
455162306a36Sopenharmony_ci	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
455262306a36Sopenharmony_ci	if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
455362306a36Sopenharmony_ci	    test_bit(NEW_CUR_UUID, &device->flags)) {
455462306a36Sopenharmony_ci		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
455562306a36Sopenharmony_ci		   for temporal network outages! */
455662306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
455762306a36Sopenharmony_ci		drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
455862306a36Sopenharmony_ci		tl_clear(peer_device->connection);
455962306a36Sopenharmony_ci		drbd_uuid_new_current(device);
456062306a36Sopenharmony_ci		clear_bit(NEW_CUR_UUID, &device->flags);
456162306a36Sopenharmony_ci		conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
456262306a36Sopenharmony_ci		return -EIO;
456362306a36Sopenharmony_ci	}
456462306a36Sopenharmony_ci	rv = _drbd_set_state(device, ns, cs_flags, NULL);
456562306a36Sopenharmony_ci	ns = drbd_read_state(device);
456662306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
456762306a36Sopenharmony_ci
456862306a36Sopenharmony_ci	if (rv < SS_SUCCESS) {
456962306a36Sopenharmony_ci		conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
457062306a36Sopenharmony_ci		return -EIO;
457162306a36Sopenharmony_ci	}
457262306a36Sopenharmony_ci
457362306a36Sopenharmony_ci	if (os.conn > C_WF_REPORT_PARAMS) {
457462306a36Sopenharmony_ci		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
457562306a36Sopenharmony_ci		    peer_state.disk != D_NEGOTIATING ) {
457662306a36Sopenharmony_ci			/* we want resync, peer has not yet decided to sync... */
457762306a36Sopenharmony_ci			/* Nowadays only used when forcing a node into primary role and
457862306a36Sopenharmony_ci			   setting its disk to UpToDate with that */
457962306a36Sopenharmony_ci			drbd_send_uuids(peer_device);
458062306a36Sopenharmony_ci			drbd_send_current_state(peer_device);
458162306a36Sopenharmony_ci		}
458262306a36Sopenharmony_ci	}
458362306a36Sopenharmony_ci
458462306a36Sopenharmony_ci	clear_bit(DISCARD_MY_DATA, &device->flags);
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci	drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
458762306a36Sopenharmony_ci
458862306a36Sopenharmony_ci	return 0;
458962306a36Sopenharmony_ci}
459062306a36Sopenharmony_ci
459162306a36Sopenharmony_cistatic int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
459262306a36Sopenharmony_ci{
459362306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
459462306a36Sopenharmony_ci	struct drbd_device *device;
459562306a36Sopenharmony_ci	struct p_rs_uuid *p = pi->data;
459662306a36Sopenharmony_ci
459762306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
459862306a36Sopenharmony_ci	if (!peer_device)
459962306a36Sopenharmony_ci		return -EIO;
460062306a36Sopenharmony_ci	device = peer_device->device;
460162306a36Sopenharmony_ci
460262306a36Sopenharmony_ci	wait_event(device->misc_wait,
460362306a36Sopenharmony_ci		   device->state.conn == C_WF_SYNC_UUID ||
460462306a36Sopenharmony_ci		   device->state.conn == C_BEHIND ||
460562306a36Sopenharmony_ci		   device->state.conn < C_CONNECTED ||
460662306a36Sopenharmony_ci		   device->state.disk < D_NEGOTIATING);
460762306a36Sopenharmony_ci
460862306a36Sopenharmony_ci	/* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
460962306a36Sopenharmony_ci
461062306a36Sopenharmony_ci	/* Here the _drbd_uuid_ functions are right, current should
461162306a36Sopenharmony_ci	   _not_ be rotated into the history */
461262306a36Sopenharmony_ci	if (get_ldev_if_state(device, D_NEGOTIATING)) {
461362306a36Sopenharmony_ci		_drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
461462306a36Sopenharmony_ci		_drbd_uuid_set(device, UI_BITMAP, 0UL);
461562306a36Sopenharmony_ci
461662306a36Sopenharmony_ci		drbd_print_uuids(device, "updated sync uuid");
461762306a36Sopenharmony_ci		drbd_start_resync(device, C_SYNC_TARGET);
461862306a36Sopenharmony_ci
461962306a36Sopenharmony_ci		put_ldev(device);
462062306a36Sopenharmony_ci	} else
462162306a36Sopenharmony_ci		drbd_err(device, "Ignoring SyncUUID packet!\n");
462262306a36Sopenharmony_ci
462362306a36Sopenharmony_ci	return 0;
462462306a36Sopenharmony_ci}
462562306a36Sopenharmony_ci
462662306a36Sopenharmony_ci/*
462762306a36Sopenharmony_ci * receive_bitmap_plain
462862306a36Sopenharmony_ci *
462962306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error
463062306a36Sopenharmony_ci * code upon failure.
463162306a36Sopenharmony_ci */
463262306a36Sopenharmony_cistatic int
463362306a36Sopenharmony_cireceive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
463462306a36Sopenharmony_ci		     unsigned long *p, struct bm_xfer_ctx *c)
463562306a36Sopenharmony_ci{
463662306a36Sopenharmony_ci	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
463762306a36Sopenharmony_ci				 drbd_header_size(peer_device->connection);
463862306a36Sopenharmony_ci	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
463962306a36Sopenharmony_ci				       c->bm_words - c->word_offset);
464062306a36Sopenharmony_ci	unsigned int want = num_words * sizeof(*p);
464162306a36Sopenharmony_ci	int err;
464262306a36Sopenharmony_ci
464362306a36Sopenharmony_ci	if (want != size) {
464462306a36Sopenharmony_ci		drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
464562306a36Sopenharmony_ci		return -EIO;
464662306a36Sopenharmony_ci	}
464762306a36Sopenharmony_ci	if (want == 0)
464862306a36Sopenharmony_ci		return 0;
464962306a36Sopenharmony_ci	err = drbd_recv_all(peer_device->connection, p, want);
465062306a36Sopenharmony_ci	if (err)
465162306a36Sopenharmony_ci		return err;
465262306a36Sopenharmony_ci
465362306a36Sopenharmony_ci	drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
465462306a36Sopenharmony_ci
465562306a36Sopenharmony_ci	c->word_offset += num_words;
465662306a36Sopenharmony_ci	c->bit_offset = c->word_offset * BITS_PER_LONG;
465762306a36Sopenharmony_ci	if (c->bit_offset > c->bm_bits)
465862306a36Sopenharmony_ci		c->bit_offset = c->bm_bits;
465962306a36Sopenharmony_ci
466062306a36Sopenharmony_ci	return 1;
466162306a36Sopenharmony_ci}
466262306a36Sopenharmony_ci
466362306a36Sopenharmony_cistatic enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
466462306a36Sopenharmony_ci{
466562306a36Sopenharmony_ci	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
466662306a36Sopenharmony_ci}
466762306a36Sopenharmony_ci
466862306a36Sopenharmony_cistatic int dcbp_get_start(struct p_compressed_bm *p)
466962306a36Sopenharmony_ci{
467062306a36Sopenharmony_ci	return (p->encoding & 0x80) != 0;
467162306a36Sopenharmony_ci}
467262306a36Sopenharmony_ci
467362306a36Sopenharmony_cistatic int dcbp_get_pad_bits(struct p_compressed_bm *p)
467462306a36Sopenharmony_ci{
467562306a36Sopenharmony_ci	return (p->encoding >> 4) & 0x7;
467662306a36Sopenharmony_ci}
467762306a36Sopenharmony_ci
467862306a36Sopenharmony_ci/*
467962306a36Sopenharmony_ci * recv_bm_rle_bits
468062306a36Sopenharmony_ci *
468162306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error
468262306a36Sopenharmony_ci * code upon failure.
468362306a36Sopenharmony_ci */
468462306a36Sopenharmony_cistatic int
468562306a36Sopenharmony_cirecv_bm_rle_bits(struct drbd_peer_device *peer_device,
468662306a36Sopenharmony_ci		struct p_compressed_bm *p,
468762306a36Sopenharmony_ci		 struct bm_xfer_ctx *c,
468862306a36Sopenharmony_ci		 unsigned int len)
468962306a36Sopenharmony_ci{
469062306a36Sopenharmony_ci	struct bitstream bs;
469162306a36Sopenharmony_ci	u64 look_ahead;
469262306a36Sopenharmony_ci	u64 rl;
469362306a36Sopenharmony_ci	u64 tmp;
469462306a36Sopenharmony_ci	unsigned long s = c->bit_offset;
469562306a36Sopenharmony_ci	unsigned long e;
469662306a36Sopenharmony_ci	int toggle = dcbp_get_start(p);
469762306a36Sopenharmony_ci	int have;
469862306a36Sopenharmony_ci	int bits;
469962306a36Sopenharmony_ci
470062306a36Sopenharmony_ci	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
470162306a36Sopenharmony_ci
470262306a36Sopenharmony_ci	bits = bitstream_get_bits(&bs, &look_ahead, 64);
470362306a36Sopenharmony_ci	if (bits < 0)
470462306a36Sopenharmony_ci		return -EIO;
470562306a36Sopenharmony_ci
470662306a36Sopenharmony_ci	for (have = bits; have > 0; s += rl, toggle = !toggle) {
470762306a36Sopenharmony_ci		bits = vli_decode_bits(&rl, look_ahead);
470862306a36Sopenharmony_ci		if (bits <= 0)
470962306a36Sopenharmony_ci			return -EIO;
471062306a36Sopenharmony_ci
471162306a36Sopenharmony_ci		if (toggle) {
471262306a36Sopenharmony_ci			e = s + rl -1;
471362306a36Sopenharmony_ci			if (e >= c->bm_bits) {
471462306a36Sopenharmony_ci				drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
471562306a36Sopenharmony_ci				return -EIO;
471662306a36Sopenharmony_ci			}
471762306a36Sopenharmony_ci			_drbd_bm_set_bits(peer_device->device, s, e);
471862306a36Sopenharmony_ci		}
471962306a36Sopenharmony_ci
472062306a36Sopenharmony_ci		if (have < bits) {
472162306a36Sopenharmony_ci			drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
472262306a36Sopenharmony_ci				have, bits, look_ahead,
472362306a36Sopenharmony_ci				(unsigned int)(bs.cur.b - p->code),
472462306a36Sopenharmony_ci				(unsigned int)bs.buf_len);
472562306a36Sopenharmony_ci			return -EIO;
472662306a36Sopenharmony_ci		}
472762306a36Sopenharmony_ci		/* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
472862306a36Sopenharmony_ci		if (likely(bits < 64))
472962306a36Sopenharmony_ci			look_ahead >>= bits;
473062306a36Sopenharmony_ci		else
473162306a36Sopenharmony_ci			look_ahead = 0;
473262306a36Sopenharmony_ci		have -= bits;
473362306a36Sopenharmony_ci
473462306a36Sopenharmony_ci		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
473562306a36Sopenharmony_ci		if (bits < 0)
473662306a36Sopenharmony_ci			return -EIO;
473762306a36Sopenharmony_ci		look_ahead |= tmp << have;
473862306a36Sopenharmony_ci		have += bits;
473962306a36Sopenharmony_ci	}
474062306a36Sopenharmony_ci
474162306a36Sopenharmony_ci	c->bit_offset = s;
474262306a36Sopenharmony_ci	bm_xfer_ctx_bit_to_word_offset(c);
474362306a36Sopenharmony_ci
474462306a36Sopenharmony_ci	return (s != c->bm_bits);
474562306a36Sopenharmony_ci}
474662306a36Sopenharmony_ci
474762306a36Sopenharmony_ci/*
474862306a36Sopenharmony_ci * decode_bitmap_c
474962306a36Sopenharmony_ci *
475062306a36Sopenharmony_ci * Return 0 when done, 1 when another iteration is needed, and a negative error
475162306a36Sopenharmony_ci * code upon failure.
475262306a36Sopenharmony_ci */
475362306a36Sopenharmony_cistatic int
475462306a36Sopenharmony_cidecode_bitmap_c(struct drbd_peer_device *peer_device,
475562306a36Sopenharmony_ci		struct p_compressed_bm *p,
475662306a36Sopenharmony_ci		struct bm_xfer_ctx *c,
475762306a36Sopenharmony_ci		unsigned int len)
475862306a36Sopenharmony_ci{
475962306a36Sopenharmony_ci	if (dcbp_get_code(p) == RLE_VLI_Bits)
476062306a36Sopenharmony_ci		return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
476162306a36Sopenharmony_ci
476262306a36Sopenharmony_ci	/* other variants had been implemented for evaluation,
476362306a36Sopenharmony_ci	 * but have been dropped as this one turned out to be "best"
476462306a36Sopenharmony_ci	 * during all our tests. */
476562306a36Sopenharmony_ci
476662306a36Sopenharmony_ci	drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
476762306a36Sopenharmony_ci	conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
476862306a36Sopenharmony_ci	return -EIO;
476962306a36Sopenharmony_ci}
477062306a36Sopenharmony_ci
477162306a36Sopenharmony_civoid INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
477262306a36Sopenharmony_ci		const char *direction, struct bm_xfer_ctx *c)
477362306a36Sopenharmony_ci{
477462306a36Sopenharmony_ci	/* what would it take to transfer it "plaintext" */
477562306a36Sopenharmony_ci	unsigned int header_size = drbd_header_size(peer_device->connection);
477662306a36Sopenharmony_ci	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
477762306a36Sopenharmony_ci	unsigned int plain =
477862306a36Sopenharmony_ci		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
477962306a36Sopenharmony_ci		c->bm_words * sizeof(unsigned long);
478062306a36Sopenharmony_ci	unsigned int total = c->bytes[0] + c->bytes[1];
478162306a36Sopenharmony_ci	unsigned int r;
478262306a36Sopenharmony_ci
478362306a36Sopenharmony_ci	/* total can not be zero. but just in case: */
478462306a36Sopenharmony_ci	if (total == 0)
478562306a36Sopenharmony_ci		return;
478662306a36Sopenharmony_ci
478762306a36Sopenharmony_ci	/* don't report if not compressed */
478862306a36Sopenharmony_ci	if (total >= plain)
478962306a36Sopenharmony_ci		return;
479062306a36Sopenharmony_ci
479162306a36Sopenharmony_ci	/* total < plain. check for overflow, still */
479262306a36Sopenharmony_ci	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
479362306a36Sopenharmony_ci		                    : (1000 * total / plain);
479462306a36Sopenharmony_ci
479562306a36Sopenharmony_ci	if (r > 1000)
479662306a36Sopenharmony_ci		r = 1000;
479762306a36Sopenharmony_ci
479862306a36Sopenharmony_ci	r = 1000 - r;
479962306a36Sopenharmony_ci	drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
480062306a36Sopenharmony_ci	     "total %u; compression: %u.%u%%\n",
480162306a36Sopenharmony_ci			direction,
480262306a36Sopenharmony_ci			c->bytes[1], c->packets[1],
480362306a36Sopenharmony_ci			c->bytes[0], c->packets[0],
480462306a36Sopenharmony_ci			total, r/10, r % 10);
480562306a36Sopenharmony_ci}
480662306a36Sopenharmony_ci
480762306a36Sopenharmony_ci/* Since we are processing the bitfield from lower addresses to higher,
480862306a36Sopenharmony_ci   it does not matter if the process it in 32 bit chunks or 64 bit
480962306a36Sopenharmony_ci   chunks as long as it is little endian. (Understand it as byte stream,
481062306a36Sopenharmony_ci   beginning with the lowest byte...) If we would use big endian
481162306a36Sopenharmony_ci   we would need to process it from the highest address to the lowest,
481262306a36Sopenharmony_ci   in order to be agnostic to the 32 vs 64 bits issue.
481362306a36Sopenharmony_ci
481462306a36Sopenharmony_ci   returns 0 on failure, 1 if we successfully received it. */
481562306a36Sopenharmony_cistatic int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
481662306a36Sopenharmony_ci{
481762306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
481862306a36Sopenharmony_ci	struct drbd_device *device;
481962306a36Sopenharmony_ci	struct bm_xfer_ctx c;
482062306a36Sopenharmony_ci	int err;
482162306a36Sopenharmony_ci
482262306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
482362306a36Sopenharmony_ci	if (!peer_device)
482462306a36Sopenharmony_ci		return -EIO;
482562306a36Sopenharmony_ci	device = peer_device->device;
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci	drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
482862306a36Sopenharmony_ci	/* you are supposed to send additional out-of-sync information
482962306a36Sopenharmony_ci	 * if you actually set bits during this phase */
483062306a36Sopenharmony_ci
483162306a36Sopenharmony_ci	c = (struct bm_xfer_ctx) {
483262306a36Sopenharmony_ci		.bm_bits = drbd_bm_bits(device),
483362306a36Sopenharmony_ci		.bm_words = drbd_bm_words(device),
483462306a36Sopenharmony_ci	};
483562306a36Sopenharmony_ci
483662306a36Sopenharmony_ci	for(;;) {
483762306a36Sopenharmony_ci		if (pi->cmd == P_BITMAP)
483862306a36Sopenharmony_ci			err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
483962306a36Sopenharmony_ci		else if (pi->cmd == P_COMPRESSED_BITMAP) {
484062306a36Sopenharmony_ci			/* MAYBE: sanity check that we speak proto >= 90,
484162306a36Sopenharmony_ci			 * and the feature is enabled! */
484262306a36Sopenharmony_ci			struct p_compressed_bm *p = pi->data;
484362306a36Sopenharmony_ci
484462306a36Sopenharmony_ci			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
484562306a36Sopenharmony_ci				drbd_err(device, "ReportCBitmap packet too large\n");
484662306a36Sopenharmony_ci				err = -EIO;
484762306a36Sopenharmony_ci				goto out;
484862306a36Sopenharmony_ci			}
484962306a36Sopenharmony_ci			if (pi->size <= sizeof(*p)) {
485062306a36Sopenharmony_ci				drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
485162306a36Sopenharmony_ci				err = -EIO;
485262306a36Sopenharmony_ci				goto out;
485362306a36Sopenharmony_ci			}
485462306a36Sopenharmony_ci			err = drbd_recv_all(peer_device->connection, p, pi->size);
485562306a36Sopenharmony_ci			if (err)
485662306a36Sopenharmony_ci			       goto out;
485762306a36Sopenharmony_ci			err = decode_bitmap_c(peer_device, p, &c, pi->size);
485862306a36Sopenharmony_ci		} else {
485962306a36Sopenharmony_ci			drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
486062306a36Sopenharmony_ci			err = -EIO;
486162306a36Sopenharmony_ci			goto out;
486262306a36Sopenharmony_ci		}
486362306a36Sopenharmony_ci
486462306a36Sopenharmony_ci		c.packets[pi->cmd == P_BITMAP]++;
486562306a36Sopenharmony_ci		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
486662306a36Sopenharmony_ci
486762306a36Sopenharmony_ci		if (err <= 0) {
486862306a36Sopenharmony_ci			if (err < 0)
486962306a36Sopenharmony_ci				goto out;
487062306a36Sopenharmony_ci			break;
487162306a36Sopenharmony_ci		}
487262306a36Sopenharmony_ci		err = drbd_recv_header(peer_device->connection, pi);
487362306a36Sopenharmony_ci		if (err)
487462306a36Sopenharmony_ci			goto out;
487562306a36Sopenharmony_ci	}
487662306a36Sopenharmony_ci
487762306a36Sopenharmony_ci	INFO_bm_xfer_stats(peer_device, "receive", &c);
487862306a36Sopenharmony_ci
487962306a36Sopenharmony_ci	if (device->state.conn == C_WF_BITMAP_T) {
488062306a36Sopenharmony_ci		enum drbd_state_rv rv;
488162306a36Sopenharmony_ci
488262306a36Sopenharmony_ci		err = drbd_send_bitmap(device, peer_device);
488362306a36Sopenharmony_ci		if (err)
488462306a36Sopenharmony_ci			goto out;
488562306a36Sopenharmony_ci		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
488662306a36Sopenharmony_ci		rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
488762306a36Sopenharmony_ci		D_ASSERT(device, rv == SS_SUCCESS);
488862306a36Sopenharmony_ci	} else if (device->state.conn != C_WF_BITMAP_S) {
488962306a36Sopenharmony_ci		/* admin may have requested C_DISCONNECTING,
489062306a36Sopenharmony_ci		 * other threads may have noticed network errors */
489162306a36Sopenharmony_ci		drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
489262306a36Sopenharmony_ci		    drbd_conn_str(device->state.conn));
489362306a36Sopenharmony_ci	}
489462306a36Sopenharmony_ci	err = 0;
489562306a36Sopenharmony_ci
489662306a36Sopenharmony_ci out:
489762306a36Sopenharmony_ci	drbd_bm_unlock(device);
489862306a36Sopenharmony_ci	if (!err && device->state.conn == C_WF_BITMAP_S)
489962306a36Sopenharmony_ci		drbd_start_resync(device, C_SYNC_SOURCE);
490062306a36Sopenharmony_ci	return err;
490162306a36Sopenharmony_ci}
490262306a36Sopenharmony_ci
490362306a36Sopenharmony_cistatic int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
490462306a36Sopenharmony_ci{
490562306a36Sopenharmony_ci	drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
490662306a36Sopenharmony_ci		 pi->cmd, pi->size);
490762306a36Sopenharmony_ci
490862306a36Sopenharmony_ci	return ignore_remaining_packet(connection, pi);
490962306a36Sopenharmony_ci}
491062306a36Sopenharmony_ci
491162306a36Sopenharmony_cistatic int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
491262306a36Sopenharmony_ci{
491362306a36Sopenharmony_ci	/* Make sure we've acked all the TCP data associated
491462306a36Sopenharmony_ci	 * with the data requests being unplugged */
491562306a36Sopenharmony_ci	tcp_sock_set_quickack(connection->data.socket->sk, 2);
491662306a36Sopenharmony_ci	return 0;
491762306a36Sopenharmony_ci}
491862306a36Sopenharmony_ci
491962306a36Sopenharmony_cistatic int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
492062306a36Sopenharmony_ci{
492162306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
492262306a36Sopenharmony_ci	struct drbd_device *device;
492362306a36Sopenharmony_ci	struct p_block_desc *p = pi->data;
492462306a36Sopenharmony_ci
492562306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
492662306a36Sopenharmony_ci	if (!peer_device)
492762306a36Sopenharmony_ci		return -EIO;
492862306a36Sopenharmony_ci	device = peer_device->device;
492962306a36Sopenharmony_ci
493062306a36Sopenharmony_ci	switch (device->state.conn) {
493162306a36Sopenharmony_ci	case C_WF_SYNC_UUID:
493262306a36Sopenharmony_ci	case C_WF_BITMAP_T:
493362306a36Sopenharmony_ci	case C_BEHIND:
493462306a36Sopenharmony_ci			break;
493562306a36Sopenharmony_ci	default:
493662306a36Sopenharmony_ci		drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
493762306a36Sopenharmony_ci				drbd_conn_str(device->state.conn));
493862306a36Sopenharmony_ci	}
493962306a36Sopenharmony_ci
494062306a36Sopenharmony_ci	drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
494162306a36Sopenharmony_ci
494262306a36Sopenharmony_ci	return 0;
494362306a36Sopenharmony_ci}
494462306a36Sopenharmony_ci
494562306a36Sopenharmony_cistatic int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
494662306a36Sopenharmony_ci{
494762306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
494862306a36Sopenharmony_ci	struct p_block_desc *p = pi->data;
494962306a36Sopenharmony_ci	struct drbd_device *device;
495062306a36Sopenharmony_ci	sector_t sector;
495162306a36Sopenharmony_ci	int size, err = 0;
495262306a36Sopenharmony_ci
495362306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
495462306a36Sopenharmony_ci	if (!peer_device)
495562306a36Sopenharmony_ci		return -EIO;
495662306a36Sopenharmony_ci	device = peer_device->device;
495762306a36Sopenharmony_ci
495862306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
495962306a36Sopenharmony_ci	size = be32_to_cpu(p->blksize);
496062306a36Sopenharmony_ci
496162306a36Sopenharmony_ci	dec_rs_pending(peer_device);
496262306a36Sopenharmony_ci
496362306a36Sopenharmony_ci	if (get_ldev(device)) {
496462306a36Sopenharmony_ci		struct drbd_peer_request *peer_req;
496562306a36Sopenharmony_ci
496662306a36Sopenharmony_ci		peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
496762306a36Sopenharmony_ci					       size, 0, GFP_NOIO);
496862306a36Sopenharmony_ci		if (!peer_req) {
496962306a36Sopenharmony_ci			put_ldev(device);
497062306a36Sopenharmony_ci			return -ENOMEM;
497162306a36Sopenharmony_ci		}
497262306a36Sopenharmony_ci
497362306a36Sopenharmony_ci		peer_req->w.cb = e_end_resync_block;
497462306a36Sopenharmony_ci		peer_req->opf = REQ_OP_DISCARD;
497562306a36Sopenharmony_ci		peer_req->submit_jif = jiffies;
497662306a36Sopenharmony_ci		peer_req->flags |= EE_TRIM;
497762306a36Sopenharmony_ci
497862306a36Sopenharmony_ci		spin_lock_irq(&device->resource->req_lock);
497962306a36Sopenharmony_ci		list_add_tail(&peer_req->w.list, &device->sync_ee);
498062306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
498162306a36Sopenharmony_ci
498262306a36Sopenharmony_ci		atomic_add(pi->size >> 9, &device->rs_sect_ev);
498362306a36Sopenharmony_ci		err = drbd_submit_peer_request(peer_req);
498462306a36Sopenharmony_ci
498562306a36Sopenharmony_ci		if (err) {
498662306a36Sopenharmony_ci			spin_lock_irq(&device->resource->req_lock);
498762306a36Sopenharmony_ci			list_del(&peer_req->w.list);
498862306a36Sopenharmony_ci			spin_unlock_irq(&device->resource->req_lock);
498962306a36Sopenharmony_ci
499062306a36Sopenharmony_ci			drbd_free_peer_req(device, peer_req);
499162306a36Sopenharmony_ci			put_ldev(device);
499262306a36Sopenharmony_ci			err = 0;
499362306a36Sopenharmony_ci			goto fail;
499462306a36Sopenharmony_ci		}
499562306a36Sopenharmony_ci
499662306a36Sopenharmony_ci		inc_unacked(device);
499762306a36Sopenharmony_ci
499862306a36Sopenharmony_ci		/* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
499962306a36Sopenharmony_ci		   as well as drbd_rs_complete_io() */
500062306a36Sopenharmony_ci	} else {
500162306a36Sopenharmony_ci	fail:
500262306a36Sopenharmony_ci		drbd_rs_complete_io(device, sector);
500362306a36Sopenharmony_ci		drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
500462306a36Sopenharmony_ci	}
500562306a36Sopenharmony_ci
500662306a36Sopenharmony_ci	atomic_add(size >> 9, &device->rs_sect_in);
500762306a36Sopenharmony_ci
500862306a36Sopenharmony_ci	return err;
500962306a36Sopenharmony_ci}
501062306a36Sopenharmony_ci
501162306a36Sopenharmony_cistruct data_cmd {
501262306a36Sopenharmony_ci	int expect_payload;
501362306a36Sopenharmony_ci	unsigned int pkt_size;
501462306a36Sopenharmony_ci	int (*fn)(struct drbd_connection *, struct packet_info *);
501562306a36Sopenharmony_ci};
501662306a36Sopenharmony_ci
501762306a36Sopenharmony_cistatic struct data_cmd drbd_cmd_handler[] = {
501862306a36Sopenharmony_ci	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
501962306a36Sopenharmony_ci	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
502062306a36Sopenharmony_ci	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
502162306a36Sopenharmony_ci	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
502262306a36Sopenharmony_ci	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
502362306a36Sopenharmony_ci	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
502462306a36Sopenharmony_ci	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
502562306a36Sopenharmony_ci	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
502662306a36Sopenharmony_ci	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
502762306a36Sopenharmony_ci	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
502862306a36Sopenharmony_ci	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
502962306a36Sopenharmony_ci	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
503062306a36Sopenharmony_ci	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
503162306a36Sopenharmony_ci	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
503262306a36Sopenharmony_ci	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
503362306a36Sopenharmony_ci	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
503462306a36Sopenharmony_ci	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
503562306a36Sopenharmony_ci	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
503662306a36Sopenharmony_ci	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
503762306a36Sopenharmony_ci	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
503862306a36Sopenharmony_ci	[P_RS_THIN_REQ]     = { 0, sizeof(struct p_block_req), receive_DataRequest },
503962306a36Sopenharmony_ci	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
504062306a36Sopenharmony_ci	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
504162306a36Sopenharmony_ci	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
504262306a36Sopenharmony_ci	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
504362306a36Sopenharmony_ci	[P_TRIM]	    = { 0, sizeof(struct p_trim), receive_Data },
504462306a36Sopenharmony_ci	[P_ZEROES]	    = { 0, sizeof(struct p_trim), receive_Data },
504562306a36Sopenharmony_ci	[P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
504662306a36Sopenharmony_ci};
504762306a36Sopenharmony_ci
504862306a36Sopenharmony_cistatic void drbdd(struct drbd_connection *connection)
504962306a36Sopenharmony_ci{
505062306a36Sopenharmony_ci	struct packet_info pi;
505162306a36Sopenharmony_ci	size_t shs; /* sub header size */
505262306a36Sopenharmony_ci	int err;
505362306a36Sopenharmony_ci
505462306a36Sopenharmony_ci	while (get_t_state(&connection->receiver) == RUNNING) {
505562306a36Sopenharmony_ci		struct data_cmd const *cmd;
505662306a36Sopenharmony_ci
505762306a36Sopenharmony_ci		drbd_thread_current_set_cpu(&connection->receiver);
505862306a36Sopenharmony_ci		update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
505962306a36Sopenharmony_ci		if (drbd_recv_header_maybe_unplug(connection, &pi))
506062306a36Sopenharmony_ci			goto err_out;
506162306a36Sopenharmony_ci
506262306a36Sopenharmony_ci		cmd = &drbd_cmd_handler[pi.cmd];
506362306a36Sopenharmony_ci		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
506462306a36Sopenharmony_ci			drbd_err(connection, "Unexpected data packet %s (0x%04x)",
506562306a36Sopenharmony_ci				 cmdname(pi.cmd), pi.cmd);
506662306a36Sopenharmony_ci			goto err_out;
506762306a36Sopenharmony_ci		}
506862306a36Sopenharmony_ci
506962306a36Sopenharmony_ci		shs = cmd->pkt_size;
507062306a36Sopenharmony_ci		if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
507162306a36Sopenharmony_ci			shs += sizeof(struct o_qlim);
507262306a36Sopenharmony_ci		if (pi.size > shs && !cmd->expect_payload) {
507362306a36Sopenharmony_ci			drbd_err(connection, "No payload expected %s l:%d\n",
507462306a36Sopenharmony_ci				 cmdname(pi.cmd), pi.size);
507562306a36Sopenharmony_ci			goto err_out;
507662306a36Sopenharmony_ci		}
507762306a36Sopenharmony_ci		if (pi.size < shs) {
507862306a36Sopenharmony_ci			drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
507962306a36Sopenharmony_ci				 cmdname(pi.cmd), (int)shs, pi.size);
508062306a36Sopenharmony_ci			goto err_out;
508162306a36Sopenharmony_ci		}
508262306a36Sopenharmony_ci
508362306a36Sopenharmony_ci		if (shs) {
508462306a36Sopenharmony_ci			update_receiver_timing_details(connection, drbd_recv_all_warn);
508562306a36Sopenharmony_ci			err = drbd_recv_all_warn(connection, pi.data, shs);
508662306a36Sopenharmony_ci			if (err)
508762306a36Sopenharmony_ci				goto err_out;
508862306a36Sopenharmony_ci			pi.size -= shs;
508962306a36Sopenharmony_ci		}
509062306a36Sopenharmony_ci
509162306a36Sopenharmony_ci		update_receiver_timing_details(connection, cmd->fn);
509262306a36Sopenharmony_ci		err = cmd->fn(connection, &pi);
509362306a36Sopenharmony_ci		if (err) {
509462306a36Sopenharmony_ci			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
509562306a36Sopenharmony_ci				 cmdname(pi.cmd), err, pi.size);
509662306a36Sopenharmony_ci			goto err_out;
509762306a36Sopenharmony_ci		}
509862306a36Sopenharmony_ci	}
509962306a36Sopenharmony_ci	return;
510062306a36Sopenharmony_ci
510162306a36Sopenharmony_ci    err_out:
510262306a36Sopenharmony_ci	conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
510362306a36Sopenharmony_ci}
510462306a36Sopenharmony_ci
510562306a36Sopenharmony_cistatic void conn_disconnect(struct drbd_connection *connection)
510662306a36Sopenharmony_ci{
510762306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
510862306a36Sopenharmony_ci	enum drbd_conns oc;
510962306a36Sopenharmony_ci	int vnr;
511062306a36Sopenharmony_ci
511162306a36Sopenharmony_ci	if (connection->cstate == C_STANDALONE)
511262306a36Sopenharmony_ci		return;
511362306a36Sopenharmony_ci
511462306a36Sopenharmony_ci	/* We are about to start the cleanup after connection loss.
511562306a36Sopenharmony_ci	 * Make sure drbd_make_request knows about that.
511662306a36Sopenharmony_ci	 * Usually we should be in some network failure state already,
511762306a36Sopenharmony_ci	 * but just in case we are not, we fix it up here.
511862306a36Sopenharmony_ci	 */
511962306a36Sopenharmony_ci	conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
512062306a36Sopenharmony_ci
512162306a36Sopenharmony_ci	/* ack_receiver does not clean up anything. it must not interfere, either */
512262306a36Sopenharmony_ci	drbd_thread_stop(&connection->ack_receiver);
512362306a36Sopenharmony_ci	if (connection->ack_sender) {
512462306a36Sopenharmony_ci		destroy_workqueue(connection->ack_sender);
512562306a36Sopenharmony_ci		connection->ack_sender = NULL;
512662306a36Sopenharmony_ci	}
512762306a36Sopenharmony_ci	drbd_free_sock(connection);
512862306a36Sopenharmony_ci
512962306a36Sopenharmony_ci	rcu_read_lock();
513062306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
513162306a36Sopenharmony_ci		struct drbd_device *device = peer_device->device;
513262306a36Sopenharmony_ci		kref_get(&device->kref);
513362306a36Sopenharmony_ci		rcu_read_unlock();
513462306a36Sopenharmony_ci		drbd_disconnected(peer_device);
513562306a36Sopenharmony_ci		kref_put(&device->kref, drbd_destroy_device);
513662306a36Sopenharmony_ci		rcu_read_lock();
513762306a36Sopenharmony_ci	}
513862306a36Sopenharmony_ci	rcu_read_unlock();
513962306a36Sopenharmony_ci
514062306a36Sopenharmony_ci	if (!list_empty(&connection->current_epoch->list))
514162306a36Sopenharmony_ci		drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
514262306a36Sopenharmony_ci	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
514362306a36Sopenharmony_ci	atomic_set(&connection->current_epoch->epoch_size, 0);
514462306a36Sopenharmony_ci	connection->send.seen_any_write_yet = false;
514562306a36Sopenharmony_ci
514662306a36Sopenharmony_ci	drbd_info(connection, "Connection closed\n");
514762306a36Sopenharmony_ci
514862306a36Sopenharmony_ci	if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
514962306a36Sopenharmony_ci		conn_try_outdate_peer_async(connection);
515062306a36Sopenharmony_ci
515162306a36Sopenharmony_ci	spin_lock_irq(&connection->resource->req_lock);
515262306a36Sopenharmony_ci	oc = connection->cstate;
515362306a36Sopenharmony_ci	if (oc >= C_UNCONNECTED)
515462306a36Sopenharmony_ci		_conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
515562306a36Sopenharmony_ci
515662306a36Sopenharmony_ci	spin_unlock_irq(&connection->resource->req_lock);
515762306a36Sopenharmony_ci
515862306a36Sopenharmony_ci	if (oc == C_DISCONNECTING)
515962306a36Sopenharmony_ci		conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
516062306a36Sopenharmony_ci}
516162306a36Sopenharmony_ci
516262306a36Sopenharmony_cistatic int drbd_disconnected(struct drbd_peer_device *peer_device)
516362306a36Sopenharmony_ci{
516462306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
516562306a36Sopenharmony_ci	unsigned int i;
516662306a36Sopenharmony_ci
516762306a36Sopenharmony_ci	/* wait for current activity to cease. */
516862306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
516962306a36Sopenharmony_ci	_drbd_wait_ee_list_empty(device, &device->active_ee);
517062306a36Sopenharmony_ci	_drbd_wait_ee_list_empty(device, &device->sync_ee);
517162306a36Sopenharmony_ci	_drbd_wait_ee_list_empty(device, &device->read_ee);
517262306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
517362306a36Sopenharmony_ci
517462306a36Sopenharmony_ci	/* We do not have data structures that would allow us to
517562306a36Sopenharmony_ci	 * get the rs_pending_cnt down to 0 again.
517662306a36Sopenharmony_ci	 *  * On C_SYNC_TARGET we do not have any data structures describing
517762306a36Sopenharmony_ci	 *    the pending RSDataRequest's we have sent.
517862306a36Sopenharmony_ci	 *  * On C_SYNC_SOURCE there is no data structure that tracks
517962306a36Sopenharmony_ci	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
518062306a36Sopenharmony_ci	 *  And no, it is not the sum of the reference counts in the
518162306a36Sopenharmony_ci	 *  resync_LRU. The resync_LRU tracks the whole operation including
518262306a36Sopenharmony_ci	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
518362306a36Sopenharmony_ci	 *  on the fly. */
518462306a36Sopenharmony_ci	drbd_rs_cancel_all(device);
518562306a36Sopenharmony_ci	device->rs_total = 0;
518662306a36Sopenharmony_ci	device->rs_failed = 0;
518762306a36Sopenharmony_ci	atomic_set(&device->rs_pending_cnt, 0);
518862306a36Sopenharmony_ci	wake_up(&device->misc_wait);
518962306a36Sopenharmony_ci
519062306a36Sopenharmony_ci	del_timer_sync(&device->resync_timer);
519162306a36Sopenharmony_ci	resync_timer_fn(&device->resync_timer);
519262306a36Sopenharmony_ci
519362306a36Sopenharmony_ci	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
519462306a36Sopenharmony_ci	 * w_make_resync_request etc. which may still be on the worker queue
519562306a36Sopenharmony_ci	 * to be "canceled" */
519662306a36Sopenharmony_ci	drbd_flush_workqueue(&peer_device->connection->sender_work);
519762306a36Sopenharmony_ci
519862306a36Sopenharmony_ci	drbd_finish_peer_reqs(device);
519962306a36Sopenharmony_ci
520062306a36Sopenharmony_ci	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
520162306a36Sopenharmony_ci	   might have issued a work again. The one before drbd_finish_peer_reqs() is
520262306a36Sopenharmony_ci	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
520362306a36Sopenharmony_ci	drbd_flush_workqueue(&peer_device->connection->sender_work);
520462306a36Sopenharmony_ci
520562306a36Sopenharmony_ci	/* need to do it again, drbd_finish_peer_reqs() may have populated it
520662306a36Sopenharmony_ci	 * again via drbd_try_clear_on_disk_bm(). */
520762306a36Sopenharmony_ci	drbd_rs_cancel_all(device);
520862306a36Sopenharmony_ci
520962306a36Sopenharmony_ci	kfree(device->p_uuid);
521062306a36Sopenharmony_ci	device->p_uuid = NULL;
521162306a36Sopenharmony_ci
521262306a36Sopenharmony_ci	if (!drbd_suspended(device))
521362306a36Sopenharmony_ci		tl_clear(peer_device->connection);
521462306a36Sopenharmony_ci
521562306a36Sopenharmony_ci	drbd_md_sync(device);
521662306a36Sopenharmony_ci
521762306a36Sopenharmony_ci	if (get_ldev(device)) {
521862306a36Sopenharmony_ci		drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
521962306a36Sopenharmony_ci				"write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
522062306a36Sopenharmony_ci		put_ldev(device);
522162306a36Sopenharmony_ci	}
522262306a36Sopenharmony_ci
522362306a36Sopenharmony_ci	/* tcp_close and release of sendpage pages can be deferred.  I don't
522462306a36Sopenharmony_ci	 * want to use SO_LINGER, because apparently it can be deferred for
522562306a36Sopenharmony_ci	 * more than 20 seconds (longest time I checked).
522662306a36Sopenharmony_ci	 *
522762306a36Sopenharmony_ci	 * Actually we don't care for exactly when the network stack does its
522862306a36Sopenharmony_ci	 * put_page(), but release our reference on these pages right here.
522962306a36Sopenharmony_ci	 */
523062306a36Sopenharmony_ci	i = drbd_free_peer_reqs(device, &device->net_ee);
523162306a36Sopenharmony_ci	if (i)
523262306a36Sopenharmony_ci		drbd_info(device, "net_ee not empty, killed %u entries\n", i);
523362306a36Sopenharmony_ci	i = atomic_read(&device->pp_in_use_by_net);
523462306a36Sopenharmony_ci	if (i)
523562306a36Sopenharmony_ci		drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
523662306a36Sopenharmony_ci	i = atomic_read(&device->pp_in_use);
523762306a36Sopenharmony_ci	if (i)
523862306a36Sopenharmony_ci		drbd_info(device, "pp_in_use = %d, expected 0\n", i);
523962306a36Sopenharmony_ci
524062306a36Sopenharmony_ci	D_ASSERT(device, list_empty(&device->read_ee));
524162306a36Sopenharmony_ci	D_ASSERT(device, list_empty(&device->active_ee));
524262306a36Sopenharmony_ci	D_ASSERT(device, list_empty(&device->sync_ee));
524362306a36Sopenharmony_ci	D_ASSERT(device, list_empty(&device->done_ee));
524462306a36Sopenharmony_ci
524562306a36Sopenharmony_ci	return 0;
524662306a36Sopenharmony_ci}
524762306a36Sopenharmony_ci
524862306a36Sopenharmony_ci/*
524962306a36Sopenharmony_ci * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
525062306a36Sopenharmony_ci * we can agree on is stored in agreed_pro_version.
525162306a36Sopenharmony_ci *
525262306a36Sopenharmony_ci * feature flags and the reserved array should be enough room for future
525362306a36Sopenharmony_ci * enhancements of the handshake protocol, and possible plugins...
525462306a36Sopenharmony_ci *
525562306a36Sopenharmony_ci * for now, they are expected to be zero, but ignored.
525662306a36Sopenharmony_ci */
525762306a36Sopenharmony_cistatic int drbd_send_features(struct drbd_connection *connection)
525862306a36Sopenharmony_ci{
525962306a36Sopenharmony_ci	struct drbd_socket *sock;
526062306a36Sopenharmony_ci	struct p_connection_features *p;
526162306a36Sopenharmony_ci
526262306a36Sopenharmony_ci	sock = &connection->data;
526362306a36Sopenharmony_ci	p = conn_prepare_command(connection, sock);
526462306a36Sopenharmony_ci	if (!p)
526562306a36Sopenharmony_ci		return -EIO;
526662306a36Sopenharmony_ci	memset(p, 0, sizeof(*p));
526762306a36Sopenharmony_ci	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
526862306a36Sopenharmony_ci	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
526962306a36Sopenharmony_ci	p->feature_flags = cpu_to_be32(PRO_FEATURES);
527062306a36Sopenharmony_ci	return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
527162306a36Sopenharmony_ci}
527262306a36Sopenharmony_ci
527362306a36Sopenharmony_ci/*
527462306a36Sopenharmony_ci * return values:
527562306a36Sopenharmony_ci *   1 yes, we have a valid connection
527662306a36Sopenharmony_ci *   0 oops, did not work out, please try again
527762306a36Sopenharmony_ci *  -1 peer talks different language,
527862306a36Sopenharmony_ci *     no point in trying again, please go standalone.
527962306a36Sopenharmony_ci */
528062306a36Sopenharmony_cistatic int drbd_do_features(struct drbd_connection *connection)
528162306a36Sopenharmony_ci{
528262306a36Sopenharmony_ci	/* ASSERT current == connection->receiver ... */
528362306a36Sopenharmony_ci	struct p_connection_features *p;
528462306a36Sopenharmony_ci	const int expect = sizeof(struct p_connection_features);
528562306a36Sopenharmony_ci	struct packet_info pi;
528662306a36Sopenharmony_ci	int err;
528762306a36Sopenharmony_ci
528862306a36Sopenharmony_ci	err = drbd_send_features(connection);
528962306a36Sopenharmony_ci	if (err)
529062306a36Sopenharmony_ci		return 0;
529162306a36Sopenharmony_ci
529262306a36Sopenharmony_ci	err = drbd_recv_header(connection, &pi);
529362306a36Sopenharmony_ci	if (err)
529462306a36Sopenharmony_ci		return 0;
529562306a36Sopenharmony_ci
529662306a36Sopenharmony_ci	if (pi.cmd != P_CONNECTION_FEATURES) {
529762306a36Sopenharmony_ci		drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
529862306a36Sopenharmony_ci			 cmdname(pi.cmd), pi.cmd);
529962306a36Sopenharmony_ci		return -1;
530062306a36Sopenharmony_ci	}
530162306a36Sopenharmony_ci
530262306a36Sopenharmony_ci	if (pi.size != expect) {
530362306a36Sopenharmony_ci		drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
530462306a36Sopenharmony_ci		     expect, pi.size);
530562306a36Sopenharmony_ci		return -1;
530662306a36Sopenharmony_ci	}
530762306a36Sopenharmony_ci
530862306a36Sopenharmony_ci	p = pi.data;
530962306a36Sopenharmony_ci	err = drbd_recv_all_warn(connection, p, expect);
531062306a36Sopenharmony_ci	if (err)
531162306a36Sopenharmony_ci		return 0;
531262306a36Sopenharmony_ci
531362306a36Sopenharmony_ci	p->protocol_min = be32_to_cpu(p->protocol_min);
531462306a36Sopenharmony_ci	p->protocol_max = be32_to_cpu(p->protocol_max);
531562306a36Sopenharmony_ci	if (p->protocol_max == 0)
531662306a36Sopenharmony_ci		p->protocol_max = p->protocol_min;
531762306a36Sopenharmony_ci
531862306a36Sopenharmony_ci	if (PRO_VERSION_MAX < p->protocol_min ||
531962306a36Sopenharmony_ci	    PRO_VERSION_MIN > p->protocol_max)
532062306a36Sopenharmony_ci		goto incompat;
532162306a36Sopenharmony_ci
532262306a36Sopenharmony_ci	connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
532362306a36Sopenharmony_ci	connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
532462306a36Sopenharmony_ci
532562306a36Sopenharmony_ci	drbd_info(connection, "Handshake successful: "
532662306a36Sopenharmony_ci	     "Agreed network protocol version %d\n", connection->agreed_pro_version);
532762306a36Sopenharmony_ci
532862306a36Sopenharmony_ci	drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
532962306a36Sopenharmony_ci		  connection->agreed_features,
533062306a36Sopenharmony_ci		  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
533162306a36Sopenharmony_ci		  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
533262306a36Sopenharmony_ci		  connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
533362306a36Sopenharmony_ci		  connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
533462306a36Sopenharmony_ci		  connection->agreed_features ? "" : " none");
533562306a36Sopenharmony_ci
533662306a36Sopenharmony_ci	return 1;
533762306a36Sopenharmony_ci
533862306a36Sopenharmony_ci incompat:
533962306a36Sopenharmony_ci	drbd_err(connection, "incompatible DRBD dialects: "
534062306a36Sopenharmony_ci	    "I support %d-%d, peer supports %d-%d\n",
534162306a36Sopenharmony_ci	    PRO_VERSION_MIN, PRO_VERSION_MAX,
534262306a36Sopenharmony_ci	    p->protocol_min, p->protocol_max);
534362306a36Sopenharmony_ci	return -1;
534462306a36Sopenharmony_ci}
534562306a36Sopenharmony_ci
534662306a36Sopenharmony_ci#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
534762306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection)
534862306a36Sopenharmony_ci{
534962306a36Sopenharmony_ci	drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
535062306a36Sopenharmony_ci	drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
535162306a36Sopenharmony_ci	return -1;
535262306a36Sopenharmony_ci}
535362306a36Sopenharmony_ci#else
535462306a36Sopenharmony_ci#define CHALLENGE_LEN 64
535562306a36Sopenharmony_ci
535662306a36Sopenharmony_ci/* Return value:
535762306a36Sopenharmony_ci	1 - auth succeeded,
535862306a36Sopenharmony_ci	0 - failed, try again (network error),
535962306a36Sopenharmony_ci	-1 - auth failed, don't try again.
536062306a36Sopenharmony_ci*/
536162306a36Sopenharmony_ci
536262306a36Sopenharmony_cistatic int drbd_do_auth(struct drbd_connection *connection)
536362306a36Sopenharmony_ci{
536462306a36Sopenharmony_ci	struct drbd_socket *sock;
536562306a36Sopenharmony_ci	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
536662306a36Sopenharmony_ci	char *response = NULL;
536762306a36Sopenharmony_ci	char *right_response = NULL;
536862306a36Sopenharmony_ci	char *peers_ch = NULL;
536962306a36Sopenharmony_ci	unsigned int key_len;
537062306a36Sopenharmony_ci	char secret[SHARED_SECRET_MAX]; /* 64 byte */
537162306a36Sopenharmony_ci	unsigned int resp_size;
537262306a36Sopenharmony_ci	struct shash_desc *desc;
537362306a36Sopenharmony_ci	struct packet_info pi;
537462306a36Sopenharmony_ci	struct net_conf *nc;
537562306a36Sopenharmony_ci	int err, rv;
537662306a36Sopenharmony_ci
537762306a36Sopenharmony_ci	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
537862306a36Sopenharmony_ci
537962306a36Sopenharmony_ci	rcu_read_lock();
538062306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
538162306a36Sopenharmony_ci	key_len = strlen(nc->shared_secret);
538262306a36Sopenharmony_ci	memcpy(secret, nc->shared_secret, key_len);
538362306a36Sopenharmony_ci	rcu_read_unlock();
538462306a36Sopenharmony_ci
538562306a36Sopenharmony_ci	desc = kmalloc(sizeof(struct shash_desc) +
538662306a36Sopenharmony_ci		       crypto_shash_descsize(connection->cram_hmac_tfm),
538762306a36Sopenharmony_ci		       GFP_KERNEL);
538862306a36Sopenharmony_ci	if (!desc) {
538962306a36Sopenharmony_ci		rv = -1;
539062306a36Sopenharmony_ci		goto fail;
539162306a36Sopenharmony_ci	}
539262306a36Sopenharmony_ci	desc->tfm = connection->cram_hmac_tfm;
539362306a36Sopenharmony_ci
539462306a36Sopenharmony_ci	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
539562306a36Sopenharmony_ci	if (rv) {
539662306a36Sopenharmony_ci		drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
539762306a36Sopenharmony_ci		rv = -1;
539862306a36Sopenharmony_ci		goto fail;
539962306a36Sopenharmony_ci	}
540062306a36Sopenharmony_ci
540162306a36Sopenharmony_ci	get_random_bytes(my_challenge, CHALLENGE_LEN);
540262306a36Sopenharmony_ci
540362306a36Sopenharmony_ci	sock = &connection->data;
540462306a36Sopenharmony_ci	if (!conn_prepare_command(connection, sock)) {
540562306a36Sopenharmony_ci		rv = 0;
540662306a36Sopenharmony_ci		goto fail;
540762306a36Sopenharmony_ci	}
540862306a36Sopenharmony_ci	rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
540962306a36Sopenharmony_ci				my_challenge, CHALLENGE_LEN);
541062306a36Sopenharmony_ci	if (!rv)
541162306a36Sopenharmony_ci		goto fail;
541262306a36Sopenharmony_ci
541362306a36Sopenharmony_ci	err = drbd_recv_header(connection, &pi);
541462306a36Sopenharmony_ci	if (err) {
541562306a36Sopenharmony_ci		rv = 0;
541662306a36Sopenharmony_ci		goto fail;
541762306a36Sopenharmony_ci	}
541862306a36Sopenharmony_ci
541962306a36Sopenharmony_ci	if (pi.cmd != P_AUTH_CHALLENGE) {
542062306a36Sopenharmony_ci		drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
542162306a36Sopenharmony_ci			 cmdname(pi.cmd), pi.cmd);
542262306a36Sopenharmony_ci		rv = -1;
542362306a36Sopenharmony_ci		goto fail;
542462306a36Sopenharmony_ci	}
542562306a36Sopenharmony_ci
542662306a36Sopenharmony_ci	if (pi.size > CHALLENGE_LEN * 2) {
542762306a36Sopenharmony_ci		drbd_err(connection, "expected AuthChallenge payload too big.\n");
542862306a36Sopenharmony_ci		rv = -1;
542962306a36Sopenharmony_ci		goto fail;
543062306a36Sopenharmony_ci	}
543162306a36Sopenharmony_ci
543262306a36Sopenharmony_ci	if (pi.size < CHALLENGE_LEN) {
543362306a36Sopenharmony_ci		drbd_err(connection, "AuthChallenge payload too small.\n");
543462306a36Sopenharmony_ci		rv = -1;
543562306a36Sopenharmony_ci		goto fail;
543662306a36Sopenharmony_ci	}
543762306a36Sopenharmony_ci
543862306a36Sopenharmony_ci	peers_ch = kmalloc(pi.size, GFP_NOIO);
543962306a36Sopenharmony_ci	if (!peers_ch) {
544062306a36Sopenharmony_ci		rv = -1;
544162306a36Sopenharmony_ci		goto fail;
544262306a36Sopenharmony_ci	}
544362306a36Sopenharmony_ci
544462306a36Sopenharmony_ci	err = drbd_recv_all_warn(connection, peers_ch, pi.size);
544562306a36Sopenharmony_ci	if (err) {
544662306a36Sopenharmony_ci		rv = 0;
544762306a36Sopenharmony_ci		goto fail;
544862306a36Sopenharmony_ci	}
544962306a36Sopenharmony_ci
545062306a36Sopenharmony_ci	if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
545162306a36Sopenharmony_ci		drbd_err(connection, "Peer presented the same challenge!\n");
545262306a36Sopenharmony_ci		rv = -1;
545362306a36Sopenharmony_ci		goto fail;
545462306a36Sopenharmony_ci	}
545562306a36Sopenharmony_ci
545662306a36Sopenharmony_ci	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
545762306a36Sopenharmony_ci	response = kmalloc(resp_size, GFP_NOIO);
545862306a36Sopenharmony_ci	if (!response) {
545962306a36Sopenharmony_ci		rv = -1;
546062306a36Sopenharmony_ci		goto fail;
546162306a36Sopenharmony_ci	}
546262306a36Sopenharmony_ci
546362306a36Sopenharmony_ci	rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
546462306a36Sopenharmony_ci	if (rv) {
546562306a36Sopenharmony_ci		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
546662306a36Sopenharmony_ci		rv = -1;
546762306a36Sopenharmony_ci		goto fail;
546862306a36Sopenharmony_ci	}
546962306a36Sopenharmony_ci
547062306a36Sopenharmony_ci	if (!conn_prepare_command(connection, sock)) {
547162306a36Sopenharmony_ci		rv = 0;
547262306a36Sopenharmony_ci		goto fail;
547362306a36Sopenharmony_ci	}
547462306a36Sopenharmony_ci	rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
547562306a36Sopenharmony_ci				response, resp_size);
547662306a36Sopenharmony_ci	if (!rv)
547762306a36Sopenharmony_ci		goto fail;
547862306a36Sopenharmony_ci
547962306a36Sopenharmony_ci	err = drbd_recv_header(connection, &pi);
548062306a36Sopenharmony_ci	if (err) {
548162306a36Sopenharmony_ci		rv = 0;
548262306a36Sopenharmony_ci		goto fail;
548362306a36Sopenharmony_ci	}
548462306a36Sopenharmony_ci
548562306a36Sopenharmony_ci	if (pi.cmd != P_AUTH_RESPONSE) {
548662306a36Sopenharmony_ci		drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
548762306a36Sopenharmony_ci			 cmdname(pi.cmd), pi.cmd);
548862306a36Sopenharmony_ci		rv = 0;
548962306a36Sopenharmony_ci		goto fail;
549062306a36Sopenharmony_ci	}
549162306a36Sopenharmony_ci
549262306a36Sopenharmony_ci	if (pi.size != resp_size) {
549362306a36Sopenharmony_ci		drbd_err(connection, "expected AuthResponse payload of wrong size\n");
549462306a36Sopenharmony_ci		rv = 0;
549562306a36Sopenharmony_ci		goto fail;
549662306a36Sopenharmony_ci	}
549762306a36Sopenharmony_ci
549862306a36Sopenharmony_ci	err = drbd_recv_all_warn(connection, response , resp_size);
549962306a36Sopenharmony_ci	if (err) {
550062306a36Sopenharmony_ci		rv = 0;
550162306a36Sopenharmony_ci		goto fail;
550262306a36Sopenharmony_ci	}
550362306a36Sopenharmony_ci
550462306a36Sopenharmony_ci	right_response = kmalloc(resp_size, GFP_NOIO);
550562306a36Sopenharmony_ci	if (!right_response) {
550662306a36Sopenharmony_ci		rv = -1;
550762306a36Sopenharmony_ci		goto fail;
550862306a36Sopenharmony_ci	}
550962306a36Sopenharmony_ci
551062306a36Sopenharmony_ci	rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
551162306a36Sopenharmony_ci				 right_response);
551262306a36Sopenharmony_ci	if (rv) {
551362306a36Sopenharmony_ci		drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
551462306a36Sopenharmony_ci		rv = -1;
551562306a36Sopenharmony_ci		goto fail;
551662306a36Sopenharmony_ci	}
551762306a36Sopenharmony_ci
551862306a36Sopenharmony_ci	rv = !memcmp(response, right_response, resp_size);
551962306a36Sopenharmony_ci
552062306a36Sopenharmony_ci	if (rv)
552162306a36Sopenharmony_ci		drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
552262306a36Sopenharmony_ci		     resp_size);
552362306a36Sopenharmony_ci	else
552462306a36Sopenharmony_ci		rv = -1;
552562306a36Sopenharmony_ci
552662306a36Sopenharmony_ci fail:
552762306a36Sopenharmony_ci	kfree(peers_ch);
552862306a36Sopenharmony_ci	kfree(response);
552962306a36Sopenharmony_ci	kfree(right_response);
553062306a36Sopenharmony_ci	if (desc) {
553162306a36Sopenharmony_ci		shash_desc_zero(desc);
553262306a36Sopenharmony_ci		kfree(desc);
553362306a36Sopenharmony_ci	}
553462306a36Sopenharmony_ci
553562306a36Sopenharmony_ci	return rv;
553662306a36Sopenharmony_ci}
553762306a36Sopenharmony_ci#endif
553862306a36Sopenharmony_ci
553962306a36Sopenharmony_ciint drbd_receiver(struct drbd_thread *thi)
554062306a36Sopenharmony_ci{
554162306a36Sopenharmony_ci	struct drbd_connection *connection = thi->connection;
554262306a36Sopenharmony_ci	int h;
554362306a36Sopenharmony_ci
554462306a36Sopenharmony_ci	drbd_info(connection, "receiver (re)started\n");
554562306a36Sopenharmony_ci
554662306a36Sopenharmony_ci	do {
554762306a36Sopenharmony_ci		h = conn_connect(connection);
554862306a36Sopenharmony_ci		if (h == 0) {
554962306a36Sopenharmony_ci			conn_disconnect(connection);
555062306a36Sopenharmony_ci			schedule_timeout_interruptible(HZ);
555162306a36Sopenharmony_ci		}
555262306a36Sopenharmony_ci		if (h == -1) {
555362306a36Sopenharmony_ci			drbd_warn(connection, "Discarding network configuration.\n");
555462306a36Sopenharmony_ci			conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
555562306a36Sopenharmony_ci		}
555662306a36Sopenharmony_ci	} while (h == 0);
555762306a36Sopenharmony_ci
555862306a36Sopenharmony_ci	if (h > 0) {
555962306a36Sopenharmony_ci		blk_start_plug(&connection->receiver_plug);
556062306a36Sopenharmony_ci		drbdd(connection);
556162306a36Sopenharmony_ci		blk_finish_plug(&connection->receiver_plug);
556262306a36Sopenharmony_ci	}
556362306a36Sopenharmony_ci
556462306a36Sopenharmony_ci	conn_disconnect(connection);
556562306a36Sopenharmony_ci
556662306a36Sopenharmony_ci	drbd_info(connection, "receiver terminated\n");
556762306a36Sopenharmony_ci	return 0;
556862306a36Sopenharmony_ci}
556962306a36Sopenharmony_ci
557062306a36Sopenharmony_ci/* ********* acknowledge sender ******** */
557162306a36Sopenharmony_ci
557262306a36Sopenharmony_cistatic int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
557362306a36Sopenharmony_ci{
557462306a36Sopenharmony_ci	struct p_req_state_reply *p = pi->data;
557562306a36Sopenharmony_ci	int retcode = be32_to_cpu(p->retcode);
557662306a36Sopenharmony_ci
557762306a36Sopenharmony_ci	if (retcode >= SS_SUCCESS) {
557862306a36Sopenharmony_ci		set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
557962306a36Sopenharmony_ci	} else {
558062306a36Sopenharmony_ci		set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
558162306a36Sopenharmony_ci		drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
558262306a36Sopenharmony_ci			 drbd_set_st_err_str(retcode), retcode);
558362306a36Sopenharmony_ci	}
558462306a36Sopenharmony_ci	wake_up(&connection->ping_wait);
558562306a36Sopenharmony_ci
558662306a36Sopenharmony_ci	return 0;
558762306a36Sopenharmony_ci}
558862306a36Sopenharmony_ci
558962306a36Sopenharmony_cistatic int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
559062306a36Sopenharmony_ci{
559162306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
559262306a36Sopenharmony_ci	struct drbd_device *device;
559362306a36Sopenharmony_ci	struct p_req_state_reply *p = pi->data;
559462306a36Sopenharmony_ci	int retcode = be32_to_cpu(p->retcode);
559562306a36Sopenharmony_ci
559662306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
559762306a36Sopenharmony_ci	if (!peer_device)
559862306a36Sopenharmony_ci		return -EIO;
559962306a36Sopenharmony_ci	device = peer_device->device;
560062306a36Sopenharmony_ci
560162306a36Sopenharmony_ci	if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
560262306a36Sopenharmony_ci		D_ASSERT(device, connection->agreed_pro_version < 100);
560362306a36Sopenharmony_ci		return got_conn_RqSReply(connection, pi);
560462306a36Sopenharmony_ci	}
560562306a36Sopenharmony_ci
560662306a36Sopenharmony_ci	if (retcode >= SS_SUCCESS) {
560762306a36Sopenharmony_ci		set_bit(CL_ST_CHG_SUCCESS, &device->flags);
560862306a36Sopenharmony_ci	} else {
560962306a36Sopenharmony_ci		set_bit(CL_ST_CHG_FAIL, &device->flags);
561062306a36Sopenharmony_ci		drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
561162306a36Sopenharmony_ci			drbd_set_st_err_str(retcode), retcode);
561262306a36Sopenharmony_ci	}
561362306a36Sopenharmony_ci	wake_up(&device->state_wait);
561462306a36Sopenharmony_ci
561562306a36Sopenharmony_ci	return 0;
561662306a36Sopenharmony_ci}
561762306a36Sopenharmony_ci
561862306a36Sopenharmony_cistatic int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
561962306a36Sopenharmony_ci{
562062306a36Sopenharmony_ci	return drbd_send_ping_ack(connection);
562162306a36Sopenharmony_ci
562262306a36Sopenharmony_ci}
562362306a36Sopenharmony_ci
562462306a36Sopenharmony_cistatic int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
562562306a36Sopenharmony_ci{
562662306a36Sopenharmony_ci	/* restore idle timeout */
562762306a36Sopenharmony_ci	connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
562862306a36Sopenharmony_ci	if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
562962306a36Sopenharmony_ci		wake_up(&connection->ping_wait);
563062306a36Sopenharmony_ci
563162306a36Sopenharmony_ci	return 0;
563262306a36Sopenharmony_ci}
563362306a36Sopenharmony_ci
563462306a36Sopenharmony_cistatic int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
563562306a36Sopenharmony_ci{
563662306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
563762306a36Sopenharmony_ci	struct drbd_device *device;
563862306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
563962306a36Sopenharmony_ci	sector_t sector = be64_to_cpu(p->sector);
564062306a36Sopenharmony_ci	int blksize = be32_to_cpu(p->blksize);
564162306a36Sopenharmony_ci
564262306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
564362306a36Sopenharmony_ci	if (!peer_device)
564462306a36Sopenharmony_ci		return -EIO;
564562306a36Sopenharmony_ci	device = peer_device->device;
564662306a36Sopenharmony_ci
564762306a36Sopenharmony_ci	D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
564862306a36Sopenharmony_ci
564962306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
565062306a36Sopenharmony_ci
565162306a36Sopenharmony_ci	if (get_ldev(device)) {
565262306a36Sopenharmony_ci		drbd_rs_complete_io(device, sector);
565362306a36Sopenharmony_ci		drbd_set_in_sync(peer_device, sector, blksize);
565462306a36Sopenharmony_ci		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
565562306a36Sopenharmony_ci		device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
565662306a36Sopenharmony_ci		put_ldev(device);
565762306a36Sopenharmony_ci	}
565862306a36Sopenharmony_ci	dec_rs_pending(peer_device);
565962306a36Sopenharmony_ci	atomic_add(blksize >> 9, &device->rs_sect_in);
566062306a36Sopenharmony_ci
566162306a36Sopenharmony_ci	return 0;
566262306a36Sopenharmony_ci}
566362306a36Sopenharmony_ci
566462306a36Sopenharmony_cistatic int
566562306a36Sopenharmony_civalidate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
566662306a36Sopenharmony_ci			      struct rb_root *root, const char *func,
566762306a36Sopenharmony_ci			      enum drbd_req_event what, bool missing_ok)
566862306a36Sopenharmony_ci{
566962306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
567062306a36Sopenharmony_ci	struct drbd_request *req;
567162306a36Sopenharmony_ci	struct bio_and_error m;
567262306a36Sopenharmony_ci
567362306a36Sopenharmony_ci	spin_lock_irq(&device->resource->req_lock);
567462306a36Sopenharmony_ci	req = find_request(device, root, id, sector, missing_ok, func);
567562306a36Sopenharmony_ci	if (unlikely(!req)) {
567662306a36Sopenharmony_ci		spin_unlock_irq(&device->resource->req_lock);
567762306a36Sopenharmony_ci		return -EIO;
567862306a36Sopenharmony_ci	}
567962306a36Sopenharmony_ci	__req_mod(req, what, peer_device, &m);
568062306a36Sopenharmony_ci	spin_unlock_irq(&device->resource->req_lock);
568162306a36Sopenharmony_ci
568262306a36Sopenharmony_ci	if (m.bio)
568362306a36Sopenharmony_ci		complete_master_bio(device, &m);
568462306a36Sopenharmony_ci	return 0;
568562306a36Sopenharmony_ci}
568662306a36Sopenharmony_ci
568762306a36Sopenharmony_cistatic int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
568862306a36Sopenharmony_ci{
568962306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
569062306a36Sopenharmony_ci	struct drbd_device *device;
569162306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
569262306a36Sopenharmony_ci	sector_t sector = be64_to_cpu(p->sector);
569362306a36Sopenharmony_ci	int blksize = be32_to_cpu(p->blksize);
569462306a36Sopenharmony_ci	enum drbd_req_event what;
569562306a36Sopenharmony_ci
569662306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
569762306a36Sopenharmony_ci	if (!peer_device)
569862306a36Sopenharmony_ci		return -EIO;
569962306a36Sopenharmony_ci	device = peer_device->device;
570062306a36Sopenharmony_ci
570162306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
570262306a36Sopenharmony_ci
570362306a36Sopenharmony_ci	if (p->block_id == ID_SYNCER) {
570462306a36Sopenharmony_ci		drbd_set_in_sync(peer_device, sector, blksize);
570562306a36Sopenharmony_ci		dec_rs_pending(peer_device);
570662306a36Sopenharmony_ci		return 0;
570762306a36Sopenharmony_ci	}
570862306a36Sopenharmony_ci	switch (pi->cmd) {
570962306a36Sopenharmony_ci	case P_RS_WRITE_ACK:
571062306a36Sopenharmony_ci		what = WRITE_ACKED_BY_PEER_AND_SIS;
571162306a36Sopenharmony_ci		break;
571262306a36Sopenharmony_ci	case P_WRITE_ACK:
571362306a36Sopenharmony_ci		what = WRITE_ACKED_BY_PEER;
571462306a36Sopenharmony_ci		break;
571562306a36Sopenharmony_ci	case P_RECV_ACK:
571662306a36Sopenharmony_ci		what = RECV_ACKED_BY_PEER;
571762306a36Sopenharmony_ci		break;
571862306a36Sopenharmony_ci	case P_SUPERSEDED:
571962306a36Sopenharmony_ci		what = CONFLICT_RESOLVED;
572062306a36Sopenharmony_ci		break;
572162306a36Sopenharmony_ci	case P_RETRY_WRITE:
572262306a36Sopenharmony_ci		what = POSTPONE_WRITE;
572362306a36Sopenharmony_ci		break;
572462306a36Sopenharmony_ci	default:
572562306a36Sopenharmony_ci		BUG();
572662306a36Sopenharmony_ci	}
572762306a36Sopenharmony_ci
572862306a36Sopenharmony_ci	return validate_req_change_req_state(peer_device, p->block_id, sector,
572962306a36Sopenharmony_ci					     &device->write_requests, __func__,
573062306a36Sopenharmony_ci					     what, false);
573162306a36Sopenharmony_ci}
573262306a36Sopenharmony_ci
573362306a36Sopenharmony_cistatic int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
573462306a36Sopenharmony_ci{
573562306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
573662306a36Sopenharmony_ci	struct drbd_device *device;
573762306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
573862306a36Sopenharmony_ci	sector_t sector = be64_to_cpu(p->sector);
573962306a36Sopenharmony_ci	int size = be32_to_cpu(p->blksize);
574062306a36Sopenharmony_ci	int err;
574162306a36Sopenharmony_ci
574262306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
574362306a36Sopenharmony_ci	if (!peer_device)
574462306a36Sopenharmony_ci		return -EIO;
574562306a36Sopenharmony_ci	device = peer_device->device;
574662306a36Sopenharmony_ci
574762306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
574862306a36Sopenharmony_ci
574962306a36Sopenharmony_ci	if (p->block_id == ID_SYNCER) {
575062306a36Sopenharmony_ci		dec_rs_pending(peer_device);
575162306a36Sopenharmony_ci		drbd_rs_failed_io(peer_device, sector, size);
575262306a36Sopenharmony_ci		return 0;
575362306a36Sopenharmony_ci	}
575462306a36Sopenharmony_ci
575562306a36Sopenharmony_ci	err = validate_req_change_req_state(peer_device, p->block_id, sector,
575662306a36Sopenharmony_ci					    &device->write_requests, __func__,
575762306a36Sopenharmony_ci					    NEG_ACKED, true);
575862306a36Sopenharmony_ci	if (err) {
575962306a36Sopenharmony_ci		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
576062306a36Sopenharmony_ci		   The master bio might already be completed, therefore the
576162306a36Sopenharmony_ci		   request is no longer in the collision hash. */
576262306a36Sopenharmony_ci		/* In Protocol B we might already have got a P_RECV_ACK
576362306a36Sopenharmony_ci		   but then get a P_NEG_ACK afterwards. */
576462306a36Sopenharmony_ci		drbd_set_out_of_sync(peer_device, sector, size);
576562306a36Sopenharmony_ci	}
576662306a36Sopenharmony_ci	return 0;
576762306a36Sopenharmony_ci}
576862306a36Sopenharmony_ci
576962306a36Sopenharmony_cistatic int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
577062306a36Sopenharmony_ci{
577162306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
577262306a36Sopenharmony_ci	struct drbd_device *device;
577362306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
577462306a36Sopenharmony_ci	sector_t sector = be64_to_cpu(p->sector);
577562306a36Sopenharmony_ci
577662306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
577762306a36Sopenharmony_ci	if (!peer_device)
577862306a36Sopenharmony_ci		return -EIO;
577962306a36Sopenharmony_ci	device = peer_device->device;
578062306a36Sopenharmony_ci
578162306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
578262306a36Sopenharmony_ci
578362306a36Sopenharmony_ci	drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
578462306a36Sopenharmony_ci	    (unsigned long long)sector, be32_to_cpu(p->blksize));
578562306a36Sopenharmony_ci
578662306a36Sopenharmony_ci	return validate_req_change_req_state(peer_device, p->block_id, sector,
578762306a36Sopenharmony_ci					     &device->read_requests, __func__,
578862306a36Sopenharmony_ci					     NEG_ACKED, false);
578962306a36Sopenharmony_ci}
579062306a36Sopenharmony_ci
579162306a36Sopenharmony_cistatic int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
579262306a36Sopenharmony_ci{
579362306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
579462306a36Sopenharmony_ci	struct drbd_device *device;
579562306a36Sopenharmony_ci	sector_t sector;
579662306a36Sopenharmony_ci	int size;
579762306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
579862306a36Sopenharmony_ci
579962306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
580062306a36Sopenharmony_ci	if (!peer_device)
580162306a36Sopenharmony_ci		return -EIO;
580262306a36Sopenharmony_ci	device = peer_device->device;
580362306a36Sopenharmony_ci
580462306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
580562306a36Sopenharmony_ci	size = be32_to_cpu(p->blksize);
580662306a36Sopenharmony_ci
580762306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
580862306a36Sopenharmony_ci
580962306a36Sopenharmony_ci	dec_rs_pending(peer_device);
581062306a36Sopenharmony_ci
581162306a36Sopenharmony_ci	if (get_ldev_if_state(device, D_FAILED)) {
581262306a36Sopenharmony_ci		drbd_rs_complete_io(device, sector);
581362306a36Sopenharmony_ci		switch (pi->cmd) {
581462306a36Sopenharmony_ci		case P_NEG_RS_DREPLY:
581562306a36Sopenharmony_ci			drbd_rs_failed_io(peer_device, sector, size);
581662306a36Sopenharmony_ci			break;
581762306a36Sopenharmony_ci		case P_RS_CANCEL:
581862306a36Sopenharmony_ci			break;
581962306a36Sopenharmony_ci		default:
582062306a36Sopenharmony_ci			BUG();
582162306a36Sopenharmony_ci		}
582262306a36Sopenharmony_ci		put_ldev(device);
582362306a36Sopenharmony_ci	}
582462306a36Sopenharmony_ci
582562306a36Sopenharmony_ci	return 0;
582662306a36Sopenharmony_ci}
582762306a36Sopenharmony_ci
582862306a36Sopenharmony_cistatic int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
582962306a36Sopenharmony_ci{
583062306a36Sopenharmony_ci	struct p_barrier_ack *p = pi->data;
583162306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
583262306a36Sopenharmony_ci	int vnr;
583362306a36Sopenharmony_ci
583462306a36Sopenharmony_ci	tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
583562306a36Sopenharmony_ci
583662306a36Sopenharmony_ci	rcu_read_lock();
583762306a36Sopenharmony_ci	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
583862306a36Sopenharmony_ci		struct drbd_device *device = peer_device->device;
583962306a36Sopenharmony_ci
584062306a36Sopenharmony_ci		if (device->state.conn == C_AHEAD &&
584162306a36Sopenharmony_ci		    atomic_read(&device->ap_in_flight) == 0 &&
584262306a36Sopenharmony_ci		    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
584362306a36Sopenharmony_ci			device->start_resync_timer.expires = jiffies + HZ;
584462306a36Sopenharmony_ci			add_timer(&device->start_resync_timer);
584562306a36Sopenharmony_ci		}
584662306a36Sopenharmony_ci	}
584762306a36Sopenharmony_ci	rcu_read_unlock();
584862306a36Sopenharmony_ci
584962306a36Sopenharmony_ci	return 0;
585062306a36Sopenharmony_ci}
585162306a36Sopenharmony_ci
585262306a36Sopenharmony_cistatic int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
585362306a36Sopenharmony_ci{
585462306a36Sopenharmony_ci	struct drbd_peer_device *peer_device;
585562306a36Sopenharmony_ci	struct drbd_device *device;
585662306a36Sopenharmony_ci	struct p_block_ack *p = pi->data;
585762306a36Sopenharmony_ci	struct drbd_device_work *dw;
585862306a36Sopenharmony_ci	sector_t sector;
585962306a36Sopenharmony_ci	int size;
586062306a36Sopenharmony_ci
586162306a36Sopenharmony_ci	peer_device = conn_peer_device(connection, pi->vnr);
586262306a36Sopenharmony_ci	if (!peer_device)
586362306a36Sopenharmony_ci		return -EIO;
586462306a36Sopenharmony_ci	device = peer_device->device;
586562306a36Sopenharmony_ci
586662306a36Sopenharmony_ci	sector = be64_to_cpu(p->sector);
586762306a36Sopenharmony_ci	size = be32_to_cpu(p->blksize);
586862306a36Sopenharmony_ci
586962306a36Sopenharmony_ci	update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
587062306a36Sopenharmony_ci
587162306a36Sopenharmony_ci	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
587262306a36Sopenharmony_ci		drbd_ov_out_of_sync_found(peer_device, sector, size);
587362306a36Sopenharmony_ci	else
587462306a36Sopenharmony_ci		ov_out_of_sync_print(peer_device);
587562306a36Sopenharmony_ci
587662306a36Sopenharmony_ci	if (!get_ldev(device))
587762306a36Sopenharmony_ci		return 0;
587862306a36Sopenharmony_ci
587962306a36Sopenharmony_ci	drbd_rs_complete_io(device, sector);
588062306a36Sopenharmony_ci	dec_rs_pending(peer_device);
588162306a36Sopenharmony_ci
588262306a36Sopenharmony_ci	--device->ov_left;
588362306a36Sopenharmony_ci
588462306a36Sopenharmony_ci	/* let's advance progress step marks only for every other megabyte */
588562306a36Sopenharmony_ci	if ((device->ov_left & 0x200) == 0x200)
588662306a36Sopenharmony_ci		drbd_advance_rs_marks(peer_device, device->ov_left);
588762306a36Sopenharmony_ci
588862306a36Sopenharmony_ci	if (device->ov_left == 0) {
588962306a36Sopenharmony_ci		dw = kmalloc(sizeof(*dw), GFP_NOIO);
589062306a36Sopenharmony_ci		if (dw) {
589162306a36Sopenharmony_ci			dw->w.cb = w_ov_finished;
589262306a36Sopenharmony_ci			dw->device = device;
589362306a36Sopenharmony_ci			drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
589462306a36Sopenharmony_ci		} else {
589562306a36Sopenharmony_ci			drbd_err(device, "kmalloc(dw) failed.");
589662306a36Sopenharmony_ci			ov_out_of_sync_print(peer_device);
589762306a36Sopenharmony_ci			drbd_resync_finished(peer_device);
589862306a36Sopenharmony_ci		}
589962306a36Sopenharmony_ci	}
590062306a36Sopenharmony_ci	put_ldev(device);
590162306a36Sopenharmony_ci	return 0;
590262306a36Sopenharmony_ci}
590362306a36Sopenharmony_ci
590462306a36Sopenharmony_cistatic int got_skip(struct drbd_connection *connection, struct packet_info *pi)
590562306a36Sopenharmony_ci{
590662306a36Sopenharmony_ci	return 0;
590762306a36Sopenharmony_ci}
590862306a36Sopenharmony_ci
590962306a36Sopenharmony_cistruct meta_sock_cmd {
591062306a36Sopenharmony_ci	size_t pkt_size;
591162306a36Sopenharmony_ci	int (*fn)(struct drbd_connection *connection, struct packet_info *);
591262306a36Sopenharmony_ci};
591362306a36Sopenharmony_ci
591462306a36Sopenharmony_cistatic void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
591562306a36Sopenharmony_ci{
591662306a36Sopenharmony_ci	long t;
591762306a36Sopenharmony_ci	struct net_conf *nc;
591862306a36Sopenharmony_ci
591962306a36Sopenharmony_ci	rcu_read_lock();
592062306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
592162306a36Sopenharmony_ci	t = ping_timeout ? nc->ping_timeo : nc->ping_int;
592262306a36Sopenharmony_ci	rcu_read_unlock();
592362306a36Sopenharmony_ci
592462306a36Sopenharmony_ci	t *= HZ;
592562306a36Sopenharmony_ci	if (ping_timeout)
592662306a36Sopenharmony_ci		t /= 10;
592762306a36Sopenharmony_ci
592862306a36Sopenharmony_ci	connection->meta.socket->sk->sk_rcvtimeo = t;
592962306a36Sopenharmony_ci}
593062306a36Sopenharmony_ci
593162306a36Sopenharmony_cistatic void set_ping_timeout(struct drbd_connection *connection)
593262306a36Sopenharmony_ci{
593362306a36Sopenharmony_ci	set_rcvtimeo(connection, 1);
593462306a36Sopenharmony_ci}
593562306a36Sopenharmony_ci
593662306a36Sopenharmony_cistatic void set_idle_timeout(struct drbd_connection *connection)
593762306a36Sopenharmony_ci{
593862306a36Sopenharmony_ci	set_rcvtimeo(connection, 0);
593962306a36Sopenharmony_ci}
594062306a36Sopenharmony_ci
594162306a36Sopenharmony_cistatic struct meta_sock_cmd ack_receiver_tbl[] = {
594262306a36Sopenharmony_ci	[P_PING]	    = { 0, got_Ping },
594362306a36Sopenharmony_ci	[P_PING_ACK]	    = { 0, got_PingAck },
594462306a36Sopenharmony_ci	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
594562306a36Sopenharmony_ci	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
594662306a36Sopenharmony_ci	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
594762306a36Sopenharmony_ci	[P_SUPERSEDED]   = { sizeof(struct p_block_ack), got_BlockAck },
594862306a36Sopenharmony_ci	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
594962306a36Sopenharmony_ci	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
595062306a36Sopenharmony_ci	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
595162306a36Sopenharmony_ci	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
595262306a36Sopenharmony_ci	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
595362306a36Sopenharmony_ci	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
595462306a36Sopenharmony_ci	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
595562306a36Sopenharmony_ci	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
595662306a36Sopenharmony_ci	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
595762306a36Sopenharmony_ci	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
595862306a36Sopenharmony_ci	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
595962306a36Sopenharmony_ci};
596062306a36Sopenharmony_ci
596162306a36Sopenharmony_ciint drbd_ack_receiver(struct drbd_thread *thi)
596262306a36Sopenharmony_ci{
596362306a36Sopenharmony_ci	struct drbd_connection *connection = thi->connection;
596462306a36Sopenharmony_ci	struct meta_sock_cmd *cmd = NULL;
596562306a36Sopenharmony_ci	struct packet_info pi;
596662306a36Sopenharmony_ci	unsigned long pre_recv_jif;
596762306a36Sopenharmony_ci	int rv;
596862306a36Sopenharmony_ci	void *buf    = connection->meta.rbuf;
596962306a36Sopenharmony_ci	int received = 0;
597062306a36Sopenharmony_ci	unsigned int header_size = drbd_header_size(connection);
597162306a36Sopenharmony_ci	int expect   = header_size;
597262306a36Sopenharmony_ci	bool ping_timeout_active = false;
597362306a36Sopenharmony_ci
597462306a36Sopenharmony_ci	sched_set_fifo_low(current);
597562306a36Sopenharmony_ci
597662306a36Sopenharmony_ci	while (get_t_state(thi) == RUNNING) {
597762306a36Sopenharmony_ci		drbd_thread_current_set_cpu(thi);
597862306a36Sopenharmony_ci
597962306a36Sopenharmony_ci		conn_reclaim_net_peer_reqs(connection);
598062306a36Sopenharmony_ci
598162306a36Sopenharmony_ci		if (test_and_clear_bit(SEND_PING, &connection->flags)) {
598262306a36Sopenharmony_ci			if (drbd_send_ping(connection)) {
598362306a36Sopenharmony_ci				drbd_err(connection, "drbd_send_ping has failed\n");
598462306a36Sopenharmony_ci				goto reconnect;
598562306a36Sopenharmony_ci			}
598662306a36Sopenharmony_ci			set_ping_timeout(connection);
598762306a36Sopenharmony_ci			ping_timeout_active = true;
598862306a36Sopenharmony_ci		}
598962306a36Sopenharmony_ci
599062306a36Sopenharmony_ci		pre_recv_jif = jiffies;
599162306a36Sopenharmony_ci		rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
599262306a36Sopenharmony_ci
599362306a36Sopenharmony_ci		/* Note:
599462306a36Sopenharmony_ci		 * -EINTR	 (on meta) we got a signal
599562306a36Sopenharmony_ci		 * -EAGAIN	 (on meta) rcvtimeo expired
599662306a36Sopenharmony_ci		 * -ECONNRESET	 other side closed the connection
599762306a36Sopenharmony_ci		 * -ERESTARTSYS  (on data) we got a signal
599862306a36Sopenharmony_ci		 * rv <  0	 other than above: unexpected error!
599962306a36Sopenharmony_ci		 * rv == expected: full header or command
600062306a36Sopenharmony_ci		 * rv <  expected: "woken" by signal during receive
600162306a36Sopenharmony_ci		 * rv == 0	 : "connection shut down by peer"
600262306a36Sopenharmony_ci		 */
600362306a36Sopenharmony_ci		if (likely(rv > 0)) {
600462306a36Sopenharmony_ci			received += rv;
600562306a36Sopenharmony_ci			buf	 += rv;
600662306a36Sopenharmony_ci		} else if (rv == 0) {
600762306a36Sopenharmony_ci			if (test_bit(DISCONNECT_SENT, &connection->flags)) {
600862306a36Sopenharmony_ci				long t;
600962306a36Sopenharmony_ci				rcu_read_lock();
601062306a36Sopenharmony_ci				t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
601162306a36Sopenharmony_ci				rcu_read_unlock();
601262306a36Sopenharmony_ci
601362306a36Sopenharmony_ci				t = wait_event_timeout(connection->ping_wait,
601462306a36Sopenharmony_ci						       connection->cstate < C_WF_REPORT_PARAMS,
601562306a36Sopenharmony_ci						       t);
601662306a36Sopenharmony_ci				if (t)
601762306a36Sopenharmony_ci					break;
601862306a36Sopenharmony_ci			}
601962306a36Sopenharmony_ci			drbd_err(connection, "meta connection shut down by peer.\n");
602062306a36Sopenharmony_ci			goto reconnect;
602162306a36Sopenharmony_ci		} else if (rv == -EAGAIN) {
602262306a36Sopenharmony_ci			/* If the data socket received something meanwhile,
602362306a36Sopenharmony_ci			 * that is good enough: peer is still alive. */
602462306a36Sopenharmony_ci			if (time_after(connection->last_received, pre_recv_jif))
602562306a36Sopenharmony_ci				continue;
602662306a36Sopenharmony_ci			if (ping_timeout_active) {
602762306a36Sopenharmony_ci				drbd_err(connection, "PingAck did not arrive in time.\n");
602862306a36Sopenharmony_ci				goto reconnect;
602962306a36Sopenharmony_ci			}
603062306a36Sopenharmony_ci			set_bit(SEND_PING, &connection->flags);
603162306a36Sopenharmony_ci			continue;
603262306a36Sopenharmony_ci		} else if (rv == -EINTR) {
603362306a36Sopenharmony_ci			/* maybe drbd_thread_stop(): the while condition will notice.
603462306a36Sopenharmony_ci			 * maybe woken for send_ping: we'll send a ping above,
603562306a36Sopenharmony_ci			 * and change the rcvtimeo */
603662306a36Sopenharmony_ci			flush_signals(current);
603762306a36Sopenharmony_ci			continue;
603862306a36Sopenharmony_ci		} else {
603962306a36Sopenharmony_ci			drbd_err(connection, "sock_recvmsg returned %d\n", rv);
604062306a36Sopenharmony_ci			goto reconnect;
604162306a36Sopenharmony_ci		}
604262306a36Sopenharmony_ci
604362306a36Sopenharmony_ci		if (received == expect && cmd == NULL) {
604462306a36Sopenharmony_ci			if (decode_header(connection, connection->meta.rbuf, &pi))
604562306a36Sopenharmony_ci				goto reconnect;
604662306a36Sopenharmony_ci			cmd = &ack_receiver_tbl[pi.cmd];
604762306a36Sopenharmony_ci			if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
604862306a36Sopenharmony_ci				drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
604962306a36Sopenharmony_ci					 cmdname(pi.cmd), pi.cmd);
605062306a36Sopenharmony_ci				goto disconnect;
605162306a36Sopenharmony_ci			}
605262306a36Sopenharmony_ci			expect = header_size + cmd->pkt_size;
605362306a36Sopenharmony_ci			if (pi.size != expect - header_size) {
605462306a36Sopenharmony_ci				drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
605562306a36Sopenharmony_ci					pi.cmd, pi.size);
605662306a36Sopenharmony_ci				goto reconnect;
605762306a36Sopenharmony_ci			}
605862306a36Sopenharmony_ci		}
605962306a36Sopenharmony_ci		if (received == expect) {
606062306a36Sopenharmony_ci			bool err;
606162306a36Sopenharmony_ci
606262306a36Sopenharmony_ci			err = cmd->fn(connection, &pi);
606362306a36Sopenharmony_ci			if (err) {
606462306a36Sopenharmony_ci				drbd_err(connection, "%ps failed\n", cmd->fn);
606562306a36Sopenharmony_ci				goto reconnect;
606662306a36Sopenharmony_ci			}
606762306a36Sopenharmony_ci
606862306a36Sopenharmony_ci			connection->last_received = jiffies;
606962306a36Sopenharmony_ci
607062306a36Sopenharmony_ci			if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
607162306a36Sopenharmony_ci				set_idle_timeout(connection);
607262306a36Sopenharmony_ci				ping_timeout_active = false;
607362306a36Sopenharmony_ci			}
607462306a36Sopenharmony_ci
607562306a36Sopenharmony_ci			buf	 = connection->meta.rbuf;
607662306a36Sopenharmony_ci			received = 0;
607762306a36Sopenharmony_ci			expect	 = header_size;
607862306a36Sopenharmony_ci			cmd	 = NULL;
607962306a36Sopenharmony_ci		}
608062306a36Sopenharmony_ci	}
608162306a36Sopenharmony_ci
608262306a36Sopenharmony_ci	if (0) {
608362306a36Sopenharmony_cireconnect:
608462306a36Sopenharmony_ci		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
608562306a36Sopenharmony_ci		conn_md_sync(connection);
608662306a36Sopenharmony_ci	}
608762306a36Sopenharmony_ci	if (0) {
608862306a36Sopenharmony_cidisconnect:
608962306a36Sopenharmony_ci		conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
609062306a36Sopenharmony_ci	}
609162306a36Sopenharmony_ci
609262306a36Sopenharmony_ci	drbd_info(connection, "ack_receiver terminated\n");
609362306a36Sopenharmony_ci
609462306a36Sopenharmony_ci	return 0;
609562306a36Sopenharmony_ci}
609662306a36Sopenharmony_ci
609762306a36Sopenharmony_civoid drbd_send_acks_wf(struct work_struct *ws)
609862306a36Sopenharmony_ci{
609962306a36Sopenharmony_ci	struct drbd_peer_device *peer_device =
610062306a36Sopenharmony_ci		container_of(ws, struct drbd_peer_device, send_acks_work);
610162306a36Sopenharmony_ci	struct drbd_connection *connection = peer_device->connection;
610262306a36Sopenharmony_ci	struct drbd_device *device = peer_device->device;
610362306a36Sopenharmony_ci	struct net_conf *nc;
610462306a36Sopenharmony_ci	int tcp_cork, err;
610562306a36Sopenharmony_ci
610662306a36Sopenharmony_ci	rcu_read_lock();
610762306a36Sopenharmony_ci	nc = rcu_dereference(connection->net_conf);
610862306a36Sopenharmony_ci	tcp_cork = nc->tcp_cork;
610962306a36Sopenharmony_ci	rcu_read_unlock();
611062306a36Sopenharmony_ci
611162306a36Sopenharmony_ci	if (tcp_cork)
611262306a36Sopenharmony_ci		tcp_sock_set_cork(connection->meta.socket->sk, true);
611362306a36Sopenharmony_ci
611462306a36Sopenharmony_ci	err = drbd_finish_peer_reqs(device);
611562306a36Sopenharmony_ci	kref_put(&device->kref, drbd_destroy_device);
611662306a36Sopenharmony_ci	/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
611762306a36Sopenharmony_ci	   struct work_struct send_acks_work alive, which is in the peer_device object */
611862306a36Sopenharmony_ci
611962306a36Sopenharmony_ci	if (err) {
612062306a36Sopenharmony_ci		conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
612162306a36Sopenharmony_ci		return;
612262306a36Sopenharmony_ci	}
612362306a36Sopenharmony_ci
612462306a36Sopenharmony_ci	if (tcp_cork)
612562306a36Sopenharmony_ci		tcp_sock_set_cork(connection->meta.socket->sk, false);
612662306a36Sopenharmony_ci
612762306a36Sopenharmony_ci	return;
612862306a36Sopenharmony_ci}
6129