xref: /kernel/linux/linux-5.10/fs/afs/fs_probe.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* AFS fileserver probing
3 *
4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include "afs_fs.h"
11#include "internal.h"
12#include "protocol_yfs.h"
13
14static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
15static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
16
17/*
18 * Start the probe polling timer.  We have to supply it with an inc on the
19 * outstanding server count.
20 */
21static void afs_schedule_fs_probe(struct afs_net *net,
22				  struct afs_server *server, bool fast)
23{
24	unsigned long atj;
25
26	if (!net->live)
27		return;
28
29	atj = server->probed_at;
30	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
31
32	afs_inc_servers_outstanding(net);
33	if (timer_reduce(&net->fs_probe_timer, atj))
34		afs_dec_servers_outstanding(net);
35}
36
37/*
38 * Handle the completion of a set of probes.
39 */
40static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
41{
42	bool responded = server->probe.responded;
43
44	write_seqlock(&net->fs_lock);
45	if (responded) {
46		list_add_tail(&server->probe_link, &net->fs_probe_slow);
47	} else {
48		server->rtt = UINT_MAX;
49		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
50		list_add_tail(&server->probe_link, &net->fs_probe_fast);
51	}
52	write_sequnlock(&net->fs_lock);
53
54	afs_schedule_fs_probe(net, server, !responded);
55}
56
57/*
58 * Handle the completion of a probe.
59 */
60static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
61{
62	_enter("");
63
64	if (atomic_dec_and_test(&server->probe_outstanding))
65		afs_finished_fs_probe(net, server);
66
67	wake_up_all(&server->probe_wq);
68}
69
70/*
71 * Handle inability to send a probe due to ENOMEM when trying to allocate a
72 * call struct.
73 */
74static void afs_fs_probe_not_done(struct afs_net *net,
75				  struct afs_server *server,
76				  struct afs_addr_cursor *ac)
77{
78	struct afs_addr_list *alist = ac->alist;
79	unsigned int index = ac->index;
80
81	_enter("");
82
83	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
84	spin_lock(&server->probe_lock);
85
86	server->probe.local_failure = true;
87	if (server->probe.error == 0)
88		server->probe.error = -ENOMEM;
89
90	set_bit(index, &alist->failed);
91
92	spin_unlock(&server->probe_lock);
93	return afs_done_one_fs_probe(net, server);
94}
95
96/*
97 * Process the result of probing a fileserver.  This is called after successful
98 * or failed delivery of an FS.GetCapabilities operation.
99 */
100void afs_fileserver_probe_result(struct afs_call *call)
101{
102	struct afs_addr_list *alist = call->alist;
103	struct afs_server *server = call->server;
104	unsigned int index = call->addr_ix;
105	unsigned int rtt_us = 0;
106	int ret = call->error;
107
108	_enter("%pU,%u", &server->uuid, index);
109
110	spin_lock(&server->probe_lock);
111
112	switch (ret) {
113	case 0:
114		server->probe.error = 0;
115		goto responded;
116	case -ECONNABORTED:
117		if (!server->probe.responded) {
118			server->probe.abort_code = call->abort_code;
119			server->probe.error = ret;
120		}
121		goto responded;
122	case -ENOMEM:
123	case -ENONET:
124		clear_bit(index, &alist->responded);
125		server->probe.local_failure = true;
126		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
127		goto out;
128	case -ECONNRESET: /* Responded, but call expired. */
129	case -ERFKILL:
130	case -EADDRNOTAVAIL:
131	case -ENETUNREACH:
132	case -EHOSTUNREACH:
133	case -EHOSTDOWN:
134	case -ECONNREFUSED:
135	case -ETIMEDOUT:
136	case -ETIME:
137	default:
138		clear_bit(index, &alist->responded);
139		set_bit(index, &alist->failed);
140		if (!server->probe.responded &&
141		    (server->probe.error == 0 ||
142		     server->probe.error == -ETIMEDOUT ||
143		     server->probe.error == -ETIME))
144			server->probe.error = ret;
145		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
146		goto out;
147	}
148
149responded:
150	clear_bit(index, &alist->failed);
151
152	if (call->service_id == YFS_FS_SERVICE) {
153		server->probe.is_yfs = true;
154		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
155		alist->addrs[index].srx_service = call->service_id;
156	} else {
157		server->probe.not_yfs = true;
158		if (!server->probe.is_yfs) {
159			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
160			alist->addrs[index].srx_service = call->service_id;
161		}
162	}
163
164	rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
165	if (rtt_us < server->probe.rtt) {
166		server->probe.rtt = rtt_us;
167		server->rtt = rtt_us;
168		alist->preferred = index;
169	}
170
171	smp_wmb(); /* Set rtt before responded. */
172	server->probe.responded = true;
173	set_bit(index, &alist->responded);
174	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
175out:
176	spin_unlock(&server->probe_lock);
177
178	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
179	       &server->uuid, index, &alist->addrs[index].transport,
180	       rtt_us, ret);
181
182	return afs_done_one_fs_probe(call->net, server);
183}
184
185/*
186 * Probe one or all of a fileserver's addresses to find out the best route and
187 * to query its capabilities.
188 */
189void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
190			     struct key *key, bool all)
191{
192	struct afs_addr_cursor ac = {
193		.index = 0,
194	};
195
196	_enter("%pU", &server->uuid);
197
198	read_lock(&server->fs_lock);
199	ac.alist = rcu_dereference_protected(server->addresses,
200					     lockdep_is_held(&server->fs_lock));
201	afs_get_addrlist(ac.alist);
202	read_unlock(&server->fs_lock);
203
204	server->probed_at = jiffies;
205	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
206	memset(&server->probe, 0, sizeof(server->probe));
207	server->probe.rtt = UINT_MAX;
208
209	ac.index = ac.alist->preferred;
210	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
211		all = true;
212
213	if (all) {
214		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
215			if (!afs_fs_get_capabilities(net, server, &ac, key))
216				afs_fs_probe_not_done(net, server, &ac);
217	} else {
218		if (!afs_fs_get_capabilities(net, server, &ac, key))
219			afs_fs_probe_not_done(net, server, &ac);
220	}
221
222	afs_put_addrlist(ac.alist);
223}
224
225/*
226 * Wait for the first as-yet untried fileserver to respond.
227 */
228int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
229{
230	struct wait_queue_entry *waits;
231	struct afs_server *server;
232	unsigned int rtt = UINT_MAX, rtt_s;
233	bool have_responders = false;
234	int pref = -1, i;
235
236	_enter("%u,%lx", slist->nr_servers, untried);
237
238	/* Only wait for servers that have a probe outstanding. */
239	for (i = 0; i < slist->nr_servers; i++) {
240		if (test_bit(i, &untried)) {
241			server = slist->servers[i].server;
242			if (!atomic_read(&server->probe_outstanding))
243				__clear_bit(i, &untried);
244			if (server->probe.responded)
245				have_responders = true;
246		}
247	}
248	if (have_responders || !untried)
249		return 0;
250
251	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
252	if (!waits)
253		return -ENOMEM;
254
255	for (i = 0; i < slist->nr_servers; i++) {
256		if (test_bit(i, &untried)) {
257			server = slist->servers[i].server;
258			init_waitqueue_entry(&waits[i], current);
259			add_wait_queue(&server->probe_wq, &waits[i]);
260		}
261	}
262
263	for (;;) {
264		bool still_probing = false;
265
266		set_current_state(TASK_INTERRUPTIBLE);
267		for (i = 0; i < slist->nr_servers; i++) {
268			if (test_bit(i, &untried)) {
269				server = slist->servers[i].server;
270				if (server->probe.responded)
271					goto stop;
272				if (atomic_read(&server->probe_outstanding))
273					still_probing = true;
274			}
275		}
276
277		if (!still_probing || signal_pending(current))
278			goto stop;
279		schedule();
280	}
281
282stop:
283	set_current_state(TASK_RUNNING);
284
285	for (i = 0; i < slist->nr_servers; i++) {
286		if (test_bit(i, &untried)) {
287			server = slist->servers[i].server;
288			rtt_s = READ_ONCE(server->rtt);
289			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
290			    rtt_s < rtt) {
291				pref = i;
292				rtt = rtt_s;
293			}
294
295			remove_wait_queue(&server->probe_wq, &waits[i]);
296		}
297	}
298
299	kfree(waits);
300
301	if (pref == -1 && signal_pending(current))
302		return -ERESTARTSYS;
303
304	if (pref >= 0)
305		slist->preferred = pref;
306	return 0;
307}
308
309/*
310 * Probe timer.  We have an increment on fs_outstanding that we need to pass
311 * along to the work item.
312 */
313void afs_fs_probe_timer(struct timer_list *timer)
314{
315	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
316
317	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
318		afs_dec_servers_outstanding(net);
319}
320
321/*
322 * Dispatch a probe to a server.
323 */
324static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
325	__releases(&net->fs_lock)
326{
327	struct key *key = NULL;
328
329	/* We remove it from the queues here - it will be added back to
330	 * one of the queues on the completion of the probe.
331	 */
332	list_del_init(&server->probe_link);
333
334	afs_get_server(server, afs_server_trace_get_probe);
335	write_sequnlock(&net->fs_lock);
336
337	afs_fs_probe_fileserver(net, server, key, all);
338	afs_put_server(net, server, afs_server_trace_put_probe);
339}
340
341/*
342 * Probe a server immediately without waiting for its due time to come
343 * round.  This is used when all of the addresses have been tried.
344 */
345void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
346{
347	write_seqlock(&net->fs_lock);
348	if (!list_empty(&server->probe_link))
349		return afs_dispatch_fs_probe(net, server, true);
350	write_sequnlock(&net->fs_lock);
351}
352
353/*
354 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
355 */
356void afs_fs_probe_dispatcher(struct work_struct *work)
357{
358	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
359	struct afs_server *fast, *slow, *server;
360	unsigned long nowj, timer_at, poll_at;
361	bool first_pass = true, set_timer = false;
362
363	if (!net->live) {
364		afs_dec_servers_outstanding(net);
365		return;
366	}
367
368	_enter("");
369
370	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
371		afs_dec_servers_outstanding(net);
372		_leave(" [none]");
373		return;
374	}
375
376again:
377	write_seqlock(&net->fs_lock);
378
379	fast = slow = server = NULL;
380	nowj = jiffies;
381	timer_at = nowj + MAX_JIFFY_OFFSET;
382
383	if (!list_empty(&net->fs_probe_fast)) {
384		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
385		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
386		if (time_before(nowj, poll_at)) {
387			timer_at = poll_at;
388			set_timer = true;
389			fast = NULL;
390		}
391	}
392
393	if (!list_empty(&net->fs_probe_slow)) {
394		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
395		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
396		if (time_before(nowj, poll_at)) {
397			if (time_before(poll_at, timer_at))
398			    timer_at = poll_at;
399			set_timer = true;
400			slow = NULL;
401		}
402	}
403
404	server = fast ?: slow;
405	if (server)
406		_debug("probe %pU", &server->uuid);
407
408	if (server && (first_pass || !need_resched())) {
409		afs_dispatch_fs_probe(net, server, server == fast);
410		first_pass = false;
411		goto again;
412	}
413
414	write_sequnlock(&net->fs_lock);
415
416	if (server) {
417		if (!queue_work(afs_wq, &net->fs_prober))
418			afs_dec_servers_outstanding(net);
419		_leave(" [requeue]");
420	} else if (set_timer) {
421		if (timer_reduce(&net->fs_probe_timer, timer_at))
422			afs_dec_servers_outstanding(net);
423		_leave(" [timer]");
424	} else {
425		afs_dec_servers_outstanding(net);
426		_leave(" [quiesce]");
427	}
428}
429
430/*
431 * Wait for a probe on a particular fileserver to complete for 2s.
432 */
433int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
434{
435	struct wait_queue_entry wait;
436	unsigned long timo = 2 * HZ;
437
438	if (atomic_read(&server->probe_outstanding) == 0)
439		goto dont_wait;
440
441	init_wait_entry(&wait, 0);
442	for (;;) {
443		prepare_to_wait_event(&server->probe_wq, &wait,
444				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
445		if (timo == 0 ||
446		    server->probe.responded ||
447		    atomic_read(&server->probe_outstanding) == 0 ||
448		    (is_intr && signal_pending(current)))
449			break;
450		timo = schedule_timeout(timo);
451	}
452
453	finish_wait(&server->probe_wq, &wait);
454
455dont_wait:
456	if (server->probe.responded)
457		return 0;
458	if (is_intr && signal_pending(current))
459		return -ERESTARTSYS;
460	if (timo == 0)
461		return -ETIME;
462	return -EDESTADDRREQ;
463}
464
465/*
466 * Clean up the probing when the namespace is killed off.
467 */
468void afs_fs_probe_cleanup(struct afs_net *net)
469{
470	if (del_timer_sync(&net->fs_probe_timer))
471		afs_dec_servers_outstanding(net);
472}
473