xref: /kernel/linux/linux-5.10/fs/afs/vl_rotate.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* Handle vlserver selection and rotation.
3 *
4 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/sched/signal.h>
11#include "internal.h"
12#include "afs_vl.h"
13
14/*
15 * Begin an operation on a volume location server.
16 */
17bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
18				  struct key *key)
19{
20	memset(vc, 0, sizeof(*vc));
21	vc->cell = cell;
22	vc->key = key;
23	vc->error = -EDESTADDRREQ;
24	vc->ac.error = SHRT_MAX;
25
26	if (signal_pending(current)) {
27		vc->error = -EINTR;
28		vc->flags |= AFS_VL_CURSOR_STOP;
29		return false;
30	}
31
32	return true;
33}
34
35/*
36 * Begin iteration through a server list, starting with the last used server if
37 * possible, or the last recorded good server if not.
38 */
39static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
40{
41	struct afs_cell *cell = vc->cell;
42	unsigned int dns_lookup_count;
43
44	if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
45	    cell->dns_expiry <= ktime_get_real_seconds()) {
46		dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
47		set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
48		afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
49
50		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
51			if (wait_var_event_interruptible(
52				    &cell->dns_lookup_count,
53				    smp_load_acquire(&cell->dns_lookup_count)
54				    != dns_lookup_count) < 0) {
55				vc->error = -ERESTARTSYS;
56				return false;
57			}
58		}
59
60		/* Status load is ordered after lookup counter load */
61		if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
62			pr_warn("No record of cell %s\n", cell->name);
63			vc->error = -ENOENT;
64			return false;
65		}
66
67		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
68			vc->error = -EDESTADDRREQ;
69			return false;
70		}
71	}
72
73	read_lock(&cell->vl_servers_lock);
74	vc->server_list = afs_get_vlserverlist(
75		rcu_dereference_protected(cell->vl_servers,
76					  lockdep_is_held(&cell->vl_servers_lock)));
77	read_unlock(&cell->vl_servers_lock);
78	if (!vc->server_list->nr_servers)
79		return false;
80
81	vc->untried = (1UL << vc->server_list->nr_servers) - 1;
82	vc->index = -1;
83	return true;
84}
85
86/*
87 * Select the vlserver to use.  May be called multiple times to rotate
88 * through the vlservers.
89 */
90bool afs_select_vlserver(struct afs_vl_cursor *vc)
91{
92	struct afs_addr_list *alist;
93	struct afs_vlserver *vlserver;
94	struct afs_error e;
95	u32 rtt;
96	int error = vc->ac.error, i;
97
98	_enter("%lx[%d],%lx[%d],%d,%d",
99	       vc->untried, vc->index,
100	       vc->ac.tried, vc->ac.index,
101	       error, vc->ac.abort_code);
102
103	if (vc->flags & AFS_VL_CURSOR_STOP) {
104		_leave(" = f [stopped]");
105		return false;
106	}
107
108	vc->nr_iterations++;
109
110	/* Evaluate the result of the previous operation, if there was one. */
111	switch (error) {
112	case SHRT_MAX:
113		goto start;
114
115	default:
116	case 0:
117		/* Success or local failure.  Stop. */
118		vc->error = error;
119		vc->flags |= AFS_VL_CURSOR_STOP;
120		_leave(" = f [okay/local %d]", vc->ac.error);
121		return false;
122
123	case -ECONNABORTED:
124		/* The far side rejected the operation on some grounds.  This
125		 * might involve the server being busy or the volume having been moved.
126		 */
127		switch (vc->ac.abort_code) {
128		case AFSVL_IO:
129		case AFSVL_BADVOLOPER:
130		case AFSVL_NOMEM:
131			/* The server went weird. */
132			vc->error = -EREMOTEIO;
133			//write_lock(&vc->cell->vl_servers_lock);
134			//vc->server_list->weird_mask |= 1 << vc->index;
135			//write_unlock(&vc->cell->vl_servers_lock);
136			goto next_server;
137
138		default:
139			vc->error = afs_abort_to_error(vc->ac.abort_code);
140			goto failed;
141		}
142
143	case -ERFKILL:
144	case -EADDRNOTAVAIL:
145	case -ENETUNREACH:
146	case -EHOSTUNREACH:
147	case -EHOSTDOWN:
148	case -ECONNREFUSED:
149	case -ETIMEDOUT:
150	case -ETIME:
151		_debug("no conn %d", error);
152		vc->error = error;
153		goto iterate_address;
154
155	case -ECONNRESET:
156		_debug("call reset");
157		vc->error = error;
158		vc->flags |= AFS_VL_CURSOR_RETRY;
159		goto next_server;
160
161	case -EOPNOTSUPP:
162		_debug("notsupp");
163		goto next_server;
164	}
165
166restart_from_beginning:
167	_debug("restart");
168	afs_end_cursor(&vc->ac);
169	afs_put_vlserverlist(vc->cell->net, vc->server_list);
170	vc->server_list = NULL;
171	if (vc->flags & AFS_VL_CURSOR_RETRIED)
172		goto failed;
173	vc->flags |= AFS_VL_CURSOR_RETRIED;
174start:
175	_debug("start");
176
177	if (!afs_start_vl_iteration(vc))
178		goto failed;
179
180	error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
181	if (error < 0)
182		goto failed_set_error;
183
184pick_server:
185	_debug("pick [%lx]", vc->untried);
186
187	error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
188	if (error < 0)
189		goto failed_set_error;
190
191	/* Pick the untried server with the lowest RTT. */
192	vc->index = vc->server_list->preferred;
193	if (test_bit(vc->index, &vc->untried))
194		goto selected_server;
195
196	vc->index = -1;
197	rtt = U32_MAX;
198	for (i = 0; i < vc->server_list->nr_servers; i++) {
199		struct afs_vlserver *s = vc->server_list->servers[i].server;
200
201		if (!test_bit(i, &vc->untried) ||
202		    !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
203			continue;
204		if (s->probe.rtt < rtt) {
205			vc->index = i;
206			rtt = s->probe.rtt;
207		}
208	}
209
210	if (vc->index == -1)
211		goto no_more_servers;
212
213selected_server:
214	_debug("use %d", vc->index);
215	__clear_bit(vc->index, &vc->untried);
216
217	/* We're starting on a different vlserver from the list.  We need to
218	 * check it, find its address list and probe its capabilities before we
219	 * use it.
220	 */
221	ASSERTCMP(vc->ac.alist, ==, NULL);
222	vlserver = vc->server_list->servers[vc->index].server;
223	vc->server = vlserver;
224
225	_debug("USING VLSERVER: %s", vlserver->name);
226
227	read_lock(&vlserver->lock);
228	alist = rcu_dereference_protected(vlserver->addresses,
229					  lockdep_is_held(&vlserver->lock));
230	afs_get_addrlist(alist);
231	read_unlock(&vlserver->lock);
232
233	memset(&vc->ac, 0, sizeof(vc->ac));
234
235	if (!vc->ac.alist)
236		vc->ac.alist = alist;
237	else
238		afs_put_addrlist(alist);
239
240	vc->ac.index = -1;
241
242iterate_address:
243	ASSERT(vc->ac.alist);
244	/* Iterate over the current server's address list to try and find an
245	 * address on which it will respond to us.
246	 */
247	if (!afs_iterate_addresses(&vc->ac))
248		goto next_server;
249
250	_debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
251
252	_leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
253	return true;
254
255next_server:
256	_debug("next");
257	afs_end_cursor(&vc->ac);
258	goto pick_server;
259
260no_more_servers:
261	/* That's all the servers poked to no good effect.  Try again if some
262	 * of them were busy.
263	 */
264	if (vc->flags & AFS_VL_CURSOR_RETRY)
265		goto restart_from_beginning;
266
267	e.error = -EDESTADDRREQ;
268	e.responded = false;
269	for (i = 0; i < vc->server_list->nr_servers; i++) {
270		struct afs_vlserver *s = vc->server_list->servers[i].server;
271
272		if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
273			e.responded = true;
274		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
275				     s->probe.abort_code);
276	}
277
278	error = e.error;
279
280failed_set_error:
281	vc->error = error;
282failed:
283	vc->flags |= AFS_VL_CURSOR_STOP;
284	afs_end_cursor(&vc->ac);
285	_leave(" = f [failed %d]", vc->error);
286	return false;
287}
288
289/*
290 * Dump cursor state in the case of the error being EDESTADDRREQ.
291 */
292static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
293{
294	struct afs_cell *cell = vc->cell;
295	static int count;
296	int i;
297
298	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
299		return;
300	count++;
301
302	rcu_read_lock();
303	pr_notice("EDESTADDR occurred\n");
304	pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
305	pr_notice("DNS: src=%u st=%u lc=%x\n",
306		  cell->dns_source, cell->dns_status, cell->dns_lookup_count);
307	pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
308		  vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
309
310	if (vc->server_list) {
311		const struct afs_vlserver_list *sl = vc->server_list;
312		pr_notice("VC: SL nr=%u ix=%u\n",
313			  sl->nr_servers, sl->index);
314		for (i = 0; i < sl->nr_servers; i++) {
315			const struct afs_vlserver *s = sl->servers[i].server;
316			pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
317				  s->name, s->port, s->flags, s->probe.error);
318			if (s->addresses) {
319				const struct afs_addr_list *a =
320					rcu_dereference(s->addresses);
321				pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
322					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
323					  a->preferred);
324				pr_notice("VC:  - R=%lx F=%lx\n",
325					  a->responded, a->failed);
326				if (a == vc->ac.alist)
327					pr_notice("VC:  - current\n");
328			}
329		}
330	}
331
332	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
333		  vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
334		  vc->ac.responded, vc->ac.nr_iterations);
335	rcu_read_unlock();
336}
337
338/*
339 * Tidy up a volume location server cursor and unlock the vnode.
340 */
341int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
342{
343	struct afs_net *net = vc->cell->net;
344
345	if (vc->error == -EDESTADDRREQ ||
346	    vc->error == -EADDRNOTAVAIL ||
347	    vc->error == -ENETUNREACH ||
348	    vc->error == -EHOSTUNREACH)
349		afs_vl_dump_edestaddrreq(vc);
350
351	afs_end_cursor(&vc->ac);
352	afs_put_vlserverlist(net, vc->server_list);
353
354	if (vc->error == -ECONNABORTED)
355		vc->error = afs_abort_to_error(vc->ac.abort_code);
356
357	return vc->error;
358}
359