162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/* Handle vlserver selection and rotation.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
562306a36Sopenharmony_ci * Written by David Howells (dhowells@redhat.com)
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/kernel.h>
962306a36Sopenharmony_ci#include <linux/sched.h>
1062306a36Sopenharmony_ci#include <linux/sched/signal.h>
1162306a36Sopenharmony_ci#include "internal.h"
1262306a36Sopenharmony_ci#include "afs_vl.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci/*
1562306a36Sopenharmony_ci * Begin an operation on a volume location server.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_cibool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
1862306a36Sopenharmony_ci				  struct key *key)
1962306a36Sopenharmony_ci{
2062306a36Sopenharmony_ci	memset(vc, 0, sizeof(*vc));
2162306a36Sopenharmony_ci	vc->cell = cell;
2262306a36Sopenharmony_ci	vc->key = key;
2362306a36Sopenharmony_ci	vc->error = -EDESTADDRREQ;
2462306a36Sopenharmony_ci	vc->ac.error = SHRT_MAX;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	if (signal_pending(current)) {
2762306a36Sopenharmony_ci		vc->error = -EINTR;
2862306a36Sopenharmony_ci		vc->flags |= AFS_VL_CURSOR_STOP;
2962306a36Sopenharmony_ci		return false;
3062306a36Sopenharmony_ci	}
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci	return true;
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * Begin iteration through a server list, starting with the last used server if
3762306a36Sopenharmony_ci * possible, or the last recorded good server if not.
3862306a36Sopenharmony_ci */
3962306a36Sopenharmony_cistatic bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	struct afs_cell *cell = vc->cell;
4262306a36Sopenharmony_ci	unsigned int dns_lookup_count;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
4562306a36Sopenharmony_ci	    cell->dns_expiry <= ktime_get_real_seconds()) {
4662306a36Sopenharmony_ci		dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
4762306a36Sopenharmony_ci		set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
4862306a36Sopenharmony_ci		afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
5162306a36Sopenharmony_ci			if (wait_var_event_interruptible(
5262306a36Sopenharmony_ci				    &cell->dns_lookup_count,
5362306a36Sopenharmony_ci				    smp_load_acquire(&cell->dns_lookup_count)
5462306a36Sopenharmony_ci				    != dns_lookup_count) < 0) {
5562306a36Sopenharmony_ci				vc->error = -ERESTARTSYS;
5662306a36Sopenharmony_ci				return false;
5762306a36Sopenharmony_ci			}
5862306a36Sopenharmony_ci		}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci		/* Status load is ordered after lookup counter load */
6162306a36Sopenharmony_ci		if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
6262306a36Sopenharmony_ci			pr_warn("No record of cell %s\n", cell->name);
6362306a36Sopenharmony_ci			vc->error = -ENOENT;
6462306a36Sopenharmony_ci			return false;
6562306a36Sopenharmony_ci		}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
6862306a36Sopenharmony_ci			vc->error = -EDESTADDRREQ;
6962306a36Sopenharmony_ci			return false;
7062306a36Sopenharmony_ci		}
7162306a36Sopenharmony_ci	}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	read_lock(&cell->vl_servers_lock);
7462306a36Sopenharmony_ci	vc->server_list = afs_get_vlserverlist(
7562306a36Sopenharmony_ci		rcu_dereference_protected(cell->vl_servers,
7662306a36Sopenharmony_ci					  lockdep_is_held(&cell->vl_servers_lock)));
7762306a36Sopenharmony_ci	read_unlock(&cell->vl_servers_lock);
7862306a36Sopenharmony_ci	if (!vc->server_list->nr_servers)
7962306a36Sopenharmony_ci		return false;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	vc->untried = (1UL << vc->server_list->nr_servers) - 1;
8262306a36Sopenharmony_ci	vc->index = -1;
8362306a36Sopenharmony_ci	return true;
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci/*
8762306a36Sopenharmony_ci * Select the vlserver to use.  May be called multiple times to rotate
8862306a36Sopenharmony_ci * through the vlservers.
8962306a36Sopenharmony_ci */
9062306a36Sopenharmony_cibool afs_select_vlserver(struct afs_vl_cursor *vc)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	struct afs_addr_list *alist;
9362306a36Sopenharmony_ci	struct afs_vlserver *vlserver;
9462306a36Sopenharmony_ci	struct afs_error e;
9562306a36Sopenharmony_ci	u32 rtt;
9662306a36Sopenharmony_ci	int error = vc->ac.error, i;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	_enter("%lx[%d],%lx[%d],%d,%d",
9962306a36Sopenharmony_ci	       vc->untried, vc->index,
10062306a36Sopenharmony_ci	       vc->ac.tried, vc->ac.index,
10162306a36Sopenharmony_ci	       error, vc->ac.abort_code);
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	if (vc->flags & AFS_VL_CURSOR_STOP) {
10462306a36Sopenharmony_ci		_leave(" = f [stopped]");
10562306a36Sopenharmony_ci		return false;
10662306a36Sopenharmony_ci	}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	vc->nr_iterations++;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	/* Evaluate the result of the previous operation, if there was one. */
11162306a36Sopenharmony_ci	switch (error) {
11262306a36Sopenharmony_ci	case SHRT_MAX:
11362306a36Sopenharmony_ci		goto start;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	default:
11662306a36Sopenharmony_ci	case 0:
11762306a36Sopenharmony_ci		/* Success or local failure.  Stop. */
11862306a36Sopenharmony_ci		vc->error = error;
11962306a36Sopenharmony_ci		vc->flags |= AFS_VL_CURSOR_STOP;
12062306a36Sopenharmony_ci		_leave(" = f [okay/local %d]", vc->ac.error);
12162306a36Sopenharmony_ci		return false;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	case -ECONNABORTED:
12462306a36Sopenharmony_ci		/* The far side rejected the operation on some grounds.  This
12562306a36Sopenharmony_ci		 * might involve the server being busy or the volume having been moved.
12662306a36Sopenharmony_ci		 */
12762306a36Sopenharmony_ci		switch (vc->ac.abort_code) {
12862306a36Sopenharmony_ci		case AFSVL_IO:
12962306a36Sopenharmony_ci		case AFSVL_BADVOLOPER:
13062306a36Sopenharmony_ci		case AFSVL_NOMEM:
13162306a36Sopenharmony_ci			/* The server went weird. */
13262306a36Sopenharmony_ci			vc->error = -EREMOTEIO;
13362306a36Sopenharmony_ci			//write_lock(&vc->cell->vl_servers_lock);
13462306a36Sopenharmony_ci			//vc->server_list->weird_mask |= 1 << vc->index;
13562306a36Sopenharmony_ci			//write_unlock(&vc->cell->vl_servers_lock);
13662306a36Sopenharmony_ci			goto next_server;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		default:
13962306a36Sopenharmony_ci			vc->error = afs_abort_to_error(vc->ac.abort_code);
14062306a36Sopenharmony_ci			goto failed;
14162306a36Sopenharmony_ci		}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	case -ERFKILL:
14462306a36Sopenharmony_ci	case -EADDRNOTAVAIL:
14562306a36Sopenharmony_ci	case -ENETUNREACH:
14662306a36Sopenharmony_ci	case -EHOSTUNREACH:
14762306a36Sopenharmony_ci	case -EHOSTDOWN:
14862306a36Sopenharmony_ci	case -ECONNREFUSED:
14962306a36Sopenharmony_ci	case -ETIMEDOUT:
15062306a36Sopenharmony_ci	case -ETIME:
15162306a36Sopenharmony_ci		_debug("no conn %d", error);
15262306a36Sopenharmony_ci		vc->error = error;
15362306a36Sopenharmony_ci		goto iterate_address;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	case -ECONNRESET:
15662306a36Sopenharmony_ci		_debug("call reset");
15762306a36Sopenharmony_ci		vc->error = error;
15862306a36Sopenharmony_ci		vc->flags |= AFS_VL_CURSOR_RETRY;
15962306a36Sopenharmony_ci		goto next_server;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	case -EOPNOTSUPP:
16262306a36Sopenharmony_ci		_debug("notsupp");
16362306a36Sopenharmony_ci		goto next_server;
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_cirestart_from_beginning:
16762306a36Sopenharmony_ci	_debug("restart");
16862306a36Sopenharmony_ci	afs_end_cursor(&vc->ac);
16962306a36Sopenharmony_ci	afs_put_vlserverlist(vc->cell->net, vc->server_list);
17062306a36Sopenharmony_ci	vc->server_list = NULL;
17162306a36Sopenharmony_ci	if (vc->flags & AFS_VL_CURSOR_RETRIED)
17262306a36Sopenharmony_ci		goto failed;
17362306a36Sopenharmony_ci	vc->flags |= AFS_VL_CURSOR_RETRIED;
17462306a36Sopenharmony_cistart:
17562306a36Sopenharmony_ci	_debug("start");
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (!afs_start_vl_iteration(vc))
17862306a36Sopenharmony_ci		goto failed;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
18162306a36Sopenharmony_ci	if (error < 0)
18262306a36Sopenharmony_ci		goto failed_set_error;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cipick_server:
18562306a36Sopenharmony_ci	_debug("pick [%lx]", vc->untried);
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
18862306a36Sopenharmony_ci	if (error < 0)
18962306a36Sopenharmony_ci		goto failed_set_error;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	/* Pick the untried server with the lowest RTT. */
19262306a36Sopenharmony_ci	vc->index = vc->server_list->preferred;
19362306a36Sopenharmony_ci	if (test_bit(vc->index, &vc->untried))
19462306a36Sopenharmony_ci		goto selected_server;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	vc->index = -1;
19762306a36Sopenharmony_ci	rtt = U32_MAX;
19862306a36Sopenharmony_ci	for (i = 0; i < vc->server_list->nr_servers; i++) {
19962306a36Sopenharmony_ci		struct afs_vlserver *s = vc->server_list->servers[i].server;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci		if (!test_bit(i, &vc->untried) ||
20262306a36Sopenharmony_ci		    !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
20362306a36Sopenharmony_ci			continue;
20462306a36Sopenharmony_ci		if (s->probe.rtt < rtt) {
20562306a36Sopenharmony_ci			vc->index = i;
20662306a36Sopenharmony_ci			rtt = s->probe.rtt;
20762306a36Sopenharmony_ci		}
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	if (vc->index == -1)
21162306a36Sopenharmony_ci		goto no_more_servers;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ciselected_server:
21462306a36Sopenharmony_ci	_debug("use %d", vc->index);
21562306a36Sopenharmony_ci	__clear_bit(vc->index, &vc->untried);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	/* We're starting on a different vlserver from the list.  We need to
21862306a36Sopenharmony_ci	 * check it, find its address list and probe its capabilities before we
21962306a36Sopenharmony_ci	 * use it.
22062306a36Sopenharmony_ci	 */
22162306a36Sopenharmony_ci	ASSERTCMP(vc->ac.alist, ==, NULL);
22262306a36Sopenharmony_ci	vlserver = vc->server_list->servers[vc->index].server;
22362306a36Sopenharmony_ci	vc->server = vlserver;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	_debug("USING VLSERVER: %s", vlserver->name);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	read_lock(&vlserver->lock);
22862306a36Sopenharmony_ci	alist = rcu_dereference_protected(vlserver->addresses,
22962306a36Sopenharmony_ci					  lockdep_is_held(&vlserver->lock));
23062306a36Sopenharmony_ci	afs_get_addrlist(alist);
23162306a36Sopenharmony_ci	read_unlock(&vlserver->lock);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	memset(&vc->ac, 0, sizeof(vc->ac));
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	if (!vc->ac.alist)
23662306a36Sopenharmony_ci		vc->ac.alist = alist;
23762306a36Sopenharmony_ci	else
23862306a36Sopenharmony_ci		afs_put_addrlist(alist);
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	vc->ac.index = -1;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ciiterate_address:
24362306a36Sopenharmony_ci	ASSERT(vc->ac.alist);
24462306a36Sopenharmony_ci	/* Iterate over the current server's address list to try and find an
24562306a36Sopenharmony_ci	 * address on which it will respond to us.
24662306a36Sopenharmony_ci	 */
24762306a36Sopenharmony_ci	if (!afs_iterate_addresses(&vc->ac))
24862306a36Sopenharmony_ci		goto next_server;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	_debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	_leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
25362306a36Sopenharmony_ci	return true;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_cinext_server:
25662306a36Sopenharmony_ci	_debug("next");
25762306a36Sopenharmony_ci	afs_end_cursor(&vc->ac);
25862306a36Sopenharmony_ci	goto pick_server;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_cino_more_servers:
26162306a36Sopenharmony_ci	/* That's all the servers poked to no good effect.  Try again if some
26262306a36Sopenharmony_ci	 * of them were busy.
26362306a36Sopenharmony_ci	 */
26462306a36Sopenharmony_ci	if (vc->flags & AFS_VL_CURSOR_RETRY)
26562306a36Sopenharmony_ci		goto restart_from_beginning;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	e.error = -EDESTADDRREQ;
26862306a36Sopenharmony_ci	e.responded = false;
26962306a36Sopenharmony_ci	for (i = 0; i < vc->server_list->nr_servers; i++) {
27062306a36Sopenharmony_ci		struct afs_vlserver *s = vc->server_list->servers[i].server;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci		if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
27362306a36Sopenharmony_ci			e.responded = true;
27462306a36Sopenharmony_ci		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
27562306a36Sopenharmony_ci				     s->probe.abort_code);
27662306a36Sopenharmony_ci	}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	error = e.error;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_cifailed_set_error:
28162306a36Sopenharmony_ci	vc->error = error;
28262306a36Sopenharmony_cifailed:
28362306a36Sopenharmony_ci	vc->flags |= AFS_VL_CURSOR_STOP;
28462306a36Sopenharmony_ci	afs_end_cursor(&vc->ac);
28562306a36Sopenharmony_ci	_leave(" = f [failed %d]", vc->error);
28662306a36Sopenharmony_ci	return false;
28762306a36Sopenharmony_ci}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci/*
29062306a36Sopenharmony_ci * Dump cursor state in the case of the error being EDESTADDRREQ.
29162306a36Sopenharmony_ci */
29262306a36Sopenharmony_cistatic void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	struct afs_cell *cell = vc->cell;
29562306a36Sopenharmony_ci	static int count;
29662306a36Sopenharmony_ci	int i;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
29962306a36Sopenharmony_ci		return;
30062306a36Sopenharmony_ci	count++;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	rcu_read_lock();
30362306a36Sopenharmony_ci	pr_notice("EDESTADDR occurred\n");
30462306a36Sopenharmony_ci	pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
30562306a36Sopenharmony_ci	pr_notice("DNS: src=%u st=%u lc=%x\n",
30662306a36Sopenharmony_ci		  cell->dns_source, cell->dns_status, cell->dns_lookup_count);
30762306a36Sopenharmony_ci	pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
30862306a36Sopenharmony_ci		  vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	if (vc->server_list) {
31162306a36Sopenharmony_ci		const struct afs_vlserver_list *sl = vc->server_list;
31262306a36Sopenharmony_ci		pr_notice("VC: SL nr=%u ix=%u\n",
31362306a36Sopenharmony_ci			  sl->nr_servers, sl->index);
31462306a36Sopenharmony_ci		for (i = 0; i < sl->nr_servers; i++) {
31562306a36Sopenharmony_ci			const struct afs_vlserver *s = sl->servers[i].server;
31662306a36Sopenharmony_ci			pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
31762306a36Sopenharmony_ci				  s->name, s->port, s->flags, s->probe.error);
31862306a36Sopenharmony_ci			if (s->addresses) {
31962306a36Sopenharmony_ci				const struct afs_addr_list *a =
32062306a36Sopenharmony_ci					rcu_dereference(s->addresses);
32162306a36Sopenharmony_ci				pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
32262306a36Sopenharmony_ci					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
32362306a36Sopenharmony_ci					  a->preferred);
32462306a36Sopenharmony_ci				pr_notice("VC:  - R=%lx F=%lx\n",
32562306a36Sopenharmony_ci					  a->responded, a->failed);
32662306a36Sopenharmony_ci				if (a == vc->ac.alist)
32762306a36Sopenharmony_ci					pr_notice("VC:  - current\n");
32862306a36Sopenharmony_ci			}
32962306a36Sopenharmony_ci		}
33062306a36Sopenharmony_ci	}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
33362306a36Sopenharmony_ci		  vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
33462306a36Sopenharmony_ci		  vc->ac.responded, vc->ac.nr_iterations);
33562306a36Sopenharmony_ci	rcu_read_unlock();
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci/*
33962306a36Sopenharmony_ci * Tidy up a volume location server cursor and unlock the vnode.
34062306a36Sopenharmony_ci */
34162306a36Sopenharmony_ciint afs_end_vlserver_operation(struct afs_vl_cursor *vc)
34262306a36Sopenharmony_ci{
34362306a36Sopenharmony_ci	struct afs_net *net = vc->cell->net;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	if (vc->error == -EDESTADDRREQ ||
34662306a36Sopenharmony_ci	    vc->error == -EADDRNOTAVAIL ||
34762306a36Sopenharmony_ci	    vc->error == -ENETUNREACH ||
34862306a36Sopenharmony_ci	    vc->error == -EHOSTUNREACH)
34962306a36Sopenharmony_ci		afs_vl_dump_edestaddrreq(vc);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	afs_end_cursor(&vc->ac);
35262306a36Sopenharmony_ci	afs_put_vlserverlist(net, vc->server_list);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	if (vc->error == -ECONNABORTED)
35562306a36Sopenharmony_ci		vc->error = afs_abort_to_error(vc->ac.abort_code);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	return vc->error;
35862306a36Sopenharmony_ci}
359