xref: /kernel/linux/linux-6.6/fs/afs/fs_probe.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* AFS fileserver probing
3 *
4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include "afs_fs.h"
11#include "internal.h"
12#include "protocol_afs.h"
13#include "protocol_yfs.h"
14
15static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
16static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
17
18/*
19 * Start the probe polling timer.  We have to supply it with an inc on the
20 * outstanding server count.
21 */
22static void afs_schedule_fs_probe(struct afs_net *net,
23				  struct afs_server *server, bool fast)
24{
25	unsigned long atj;
26
27	if (!net->live)
28		return;
29
30	atj = server->probed_at;
31	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
32
33	afs_inc_servers_outstanding(net);
34	if (timer_reduce(&net->fs_probe_timer, atj))
35		afs_dec_servers_outstanding(net);
36}
37
38/*
39 * Handle the completion of a set of probes.
40 */
41static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
42{
43	bool responded = server->probe.responded;
44
45	write_seqlock(&net->fs_lock);
46	if (responded) {
47		list_add_tail(&server->probe_link, &net->fs_probe_slow);
48	} else {
49		server->rtt = UINT_MAX;
50		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
51		list_add_tail(&server->probe_link, &net->fs_probe_fast);
52	}
53	write_sequnlock(&net->fs_lock);
54
55	afs_schedule_fs_probe(net, server, !responded);
56}
57
58/*
59 * Handle the completion of a probe.
60 */
61static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
62{
63	_enter("");
64
65	if (atomic_dec_and_test(&server->probe_outstanding))
66		afs_finished_fs_probe(net, server);
67
68	wake_up_all(&server->probe_wq);
69}
70
71/*
72 * Handle inability to send a probe due to ENOMEM when trying to allocate a
73 * call struct.
74 */
75static void afs_fs_probe_not_done(struct afs_net *net,
76				  struct afs_server *server,
77				  struct afs_addr_cursor *ac)
78{
79	struct afs_addr_list *alist = ac->alist;
80	unsigned int index = ac->index;
81
82	_enter("");
83
84	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
85	spin_lock(&server->probe_lock);
86
87	server->probe.local_failure = true;
88	if (server->probe.error == 0)
89		server->probe.error = -ENOMEM;
90
91	set_bit(index, &alist->failed);
92
93	spin_unlock(&server->probe_lock);
94	return afs_done_one_fs_probe(net, server);
95}
96
97/*
98 * Process the result of probing a fileserver.  This is called after successful
99 * or failed delivery of an FS.GetCapabilities operation.
100 */
101void afs_fileserver_probe_result(struct afs_call *call)
102{
103	struct afs_addr_list *alist = call->alist;
104	struct afs_server *server = call->server;
105	unsigned int index = call->addr_ix;
106	unsigned int rtt_us = 0, cap0;
107	int ret = call->error;
108
109	_enter("%pU,%u", &server->uuid, index);
110
111	spin_lock(&server->probe_lock);
112
113	switch (ret) {
114	case 0:
115		server->probe.error = 0;
116		goto responded;
117	case -ECONNABORTED:
118		if (!server->probe.responded) {
119			server->probe.abort_code = call->abort_code;
120			server->probe.error = ret;
121		}
122		goto responded;
123	case -ENOMEM:
124	case -ENONET:
125		clear_bit(index, &alist->responded);
126		server->probe.local_failure = true;
127		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
128		goto out;
129	case -ECONNRESET: /* Responded, but call expired. */
130	case -ERFKILL:
131	case -EADDRNOTAVAIL:
132	case -ENETUNREACH:
133	case -EHOSTUNREACH:
134	case -EHOSTDOWN:
135	case -ECONNREFUSED:
136	case -ETIMEDOUT:
137	case -ETIME:
138	default:
139		clear_bit(index, &alist->responded);
140		set_bit(index, &alist->failed);
141		if (!server->probe.responded &&
142		    (server->probe.error == 0 ||
143		     server->probe.error == -ETIMEDOUT ||
144		     server->probe.error == -ETIME))
145			server->probe.error = ret;
146		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
147		goto out;
148	}
149
150responded:
151	clear_bit(index, &alist->failed);
152
153	if (call->service_id == YFS_FS_SERVICE) {
154		server->probe.is_yfs = true;
155		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
156		alist->addrs[index].srx_service = call->service_id;
157	} else {
158		server->probe.not_yfs = true;
159		if (!server->probe.is_yfs) {
160			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
161			alist->addrs[index].srx_service = call->service_id;
162		}
163		cap0 = ntohl(call->tmp);
164		if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
165			set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
166		else
167			clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
168	}
169
170	rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
171	if (rtt_us < server->probe.rtt) {
172		server->probe.rtt = rtt_us;
173		server->rtt = rtt_us;
174		alist->preferred = index;
175	}
176
177	smp_wmb(); /* Set rtt before responded. */
178	server->probe.responded = true;
179	set_bit(index, &alist->responded);
180	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
181out:
182	spin_unlock(&server->probe_lock);
183
184	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
185	       &server->uuid, index, &alist->addrs[index].transport,
186	       rtt_us, ret);
187
188	return afs_done_one_fs_probe(call->net, server);
189}
190
191/*
192 * Probe one or all of a fileserver's addresses to find out the best route and
193 * to query its capabilities.
194 */
195void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
196			     struct key *key, bool all)
197{
198	struct afs_addr_cursor ac = {
199		.index = 0,
200	};
201
202	_enter("%pU", &server->uuid);
203
204	read_lock(&server->fs_lock);
205	ac.alist = rcu_dereference_protected(server->addresses,
206					     lockdep_is_held(&server->fs_lock));
207	afs_get_addrlist(ac.alist);
208	read_unlock(&server->fs_lock);
209
210	server->probed_at = jiffies;
211	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
212	memset(&server->probe, 0, sizeof(server->probe));
213	server->probe.rtt = UINT_MAX;
214
215	ac.index = ac.alist->preferred;
216	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
217		all = true;
218
219	if (all) {
220		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
221			if (!afs_fs_get_capabilities(net, server, &ac, key))
222				afs_fs_probe_not_done(net, server, &ac);
223	} else {
224		if (!afs_fs_get_capabilities(net, server, &ac, key))
225			afs_fs_probe_not_done(net, server, &ac);
226	}
227
228	afs_put_addrlist(ac.alist);
229}
230
231/*
232 * Wait for the first as-yet untried fileserver to respond.
233 */
234int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
235{
236	struct wait_queue_entry *waits;
237	struct afs_server *server;
238	unsigned int rtt = UINT_MAX, rtt_s;
239	bool have_responders = false;
240	int pref = -1, i;
241
242	_enter("%u,%lx", slist->nr_servers, untried);
243
244	/* Only wait for servers that have a probe outstanding. */
245	for (i = 0; i < slist->nr_servers; i++) {
246		if (test_bit(i, &untried)) {
247			server = slist->servers[i].server;
248			if (!atomic_read(&server->probe_outstanding))
249				__clear_bit(i, &untried);
250			if (server->probe.responded)
251				have_responders = true;
252		}
253	}
254	if (have_responders || !untried)
255		return 0;
256
257	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
258	if (!waits)
259		return -ENOMEM;
260
261	for (i = 0; i < slist->nr_servers; i++) {
262		if (test_bit(i, &untried)) {
263			server = slist->servers[i].server;
264			init_waitqueue_entry(&waits[i], current);
265			add_wait_queue(&server->probe_wq, &waits[i]);
266		}
267	}
268
269	for (;;) {
270		bool still_probing = false;
271
272		set_current_state(TASK_INTERRUPTIBLE);
273		for (i = 0; i < slist->nr_servers; i++) {
274			if (test_bit(i, &untried)) {
275				server = slist->servers[i].server;
276				if (server->probe.responded)
277					goto stop;
278				if (atomic_read(&server->probe_outstanding))
279					still_probing = true;
280			}
281		}
282
283		if (!still_probing || signal_pending(current))
284			goto stop;
285		schedule();
286	}
287
288stop:
289	set_current_state(TASK_RUNNING);
290
291	for (i = 0; i < slist->nr_servers; i++) {
292		if (test_bit(i, &untried)) {
293			server = slist->servers[i].server;
294			rtt_s = READ_ONCE(server->rtt);
295			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
296			    rtt_s < rtt) {
297				pref = i;
298				rtt = rtt_s;
299			}
300
301			remove_wait_queue(&server->probe_wq, &waits[i]);
302		}
303	}
304
305	kfree(waits);
306
307	if (pref == -1 && signal_pending(current))
308		return -ERESTARTSYS;
309
310	if (pref >= 0)
311		slist->preferred = pref;
312	return 0;
313}
314
315/*
316 * Probe timer.  We have an increment on fs_outstanding that we need to pass
317 * along to the work item.
318 */
319void afs_fs_probe_timer(struct timer_list *timer)
320{
321	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
322
323	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
324		afs_dec_servers_outstanding(net);
325}
326
327/*
328 * Dispatch a probe to a server.
329 */
330static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
331	__releases(&net->fs_lock)
332{
333	struct key *key = NULL;
334
335	/* We remove it from the queues here - it will be added back to
336	 * one of the queues on the completion of the probe.
337	 */
338	list_del_init(&server->probe_link);
339
340	afs_get_server(server, afs_server_trace_get_probe);
341	write_sequnlock(&net->fs_lock);
342
343	afs_fs_probe_fileserver(net, server, key, all);
344	afs_put_server(net, server, afs_server_trace_put_probe);
345}
346
347/*
348 * Probe a server immediately without waiting for its due time to come
349 * round.  This is used when all of the addresses have been tried.
350 */
351void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
352{
353	write_seqlock(&net->fs_lock);
354	if (!list_empty(&server->probe_link))
355		return afs_dispatch_fs_probe(net, server, true);
356	write_sequnlock(&net->fs_lock);
357}
358
359/*
360 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
361 */
362void afs_fs_probe_dispatcher(struct work_struct *work)
363{
364	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
365	struct afs_server *fast, *slow, *server;
366	unsigned long nowj, timer_at, poll_at;
367	bool first_pass = true, set_timer = false;
368
369	if (!net->live) {
370		afs_dec_servers_outstanding(net);
371		return;
372	}
373
374	_enter("");
375
376	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
377		afs_dec_servers_outstanding(net);
378		_leave(" [none]");
379		return;
380	}
381
382again:
383	write_seqlock(&net->fs_lock);
384
385	fast = slow = server = NULL;
386	nowj = jiffies;
387	timer_at = nowj + MAX_JIFFY_OFFSET;
388
389	if (!list_empty(&net->fs_probe_fast)) {
390		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
391		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
392		if (time_before(nowj, poll_at)) {
393			timer_at = poll_at;
394			set_timer = true;
395			fast = NULL;
396		}
397	}
398
399	if (!list_empty(&net->fs_probe_slow)) {
400		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
401		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
402		if (time_before(nowj, poll_at)) {
403			if (time_before(poll_at, timer_at))
404			    timer_at = poll_at;
405			set_timer = true;
406			slow = NULL;
407		}
408	}
409
410	server = fast ?: slow;
411	if (server)
412		_debug("probe %pU", &server->uuid);
413
414	if (server && (first_pass || !need_resched())) {
415		afs_dispatch_fs_probe(net, server, server == fast);
416		first_pass = false;
417		goto again;
418	}
419
420	write_sequnlock(&net->fs_lock);
421
422	if (server) {
423		if (!queue_work(afs_wq, &net->fs_prober))
424			afs_dec_servers_outstanding(net);
425		_leave(" [requeue]");
426	} else if (set_timer) {
427		if (timer_reduce(&net->fs_probe_timer, timer_at))
428			afs_dec_servers_outstanding(net);
429		_leave(" [timer]");
430	} else {
431		afs_dec_servers_outstanding(net);
432		_leave(" [quiesce]");
433	}
434}
435
436/*
437 * Wait for a probe on a particular fileserver to complete for 2s.
438 */
439int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
440{
441	struct wait_queue_entry wait;
442	unsigned long timo = 2 * HZ;
443
444	if (atomic_read(&server->probe_outstanding) == 0)
445		goto dont_wait;
446
447	init_wait_entry(&wait, 0);
448	for (;;) {
449		prepare_to_wait_event(&server->probe_wq, &wait,
450				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
451		if (timo == 0 ||
452		    server->probe.responded ||
453		    atomic_read(&server->probe_outstanding) == 0 ||
454		    (is_intr && signal_pending(current)))
455			break;
456		timo = schedule_timeout(timo);
457	}
458
459	finish_wait(&server->probe_wq, &wait);
460
461dont_wait:
462	if (server->probe.responded)
463		return 0;
464	if (is_intr && signal_pending(current))
465		return -ERESTARTSYS;
466	if (timo == 0)
467		return -ETIME;
468	return -EDESTADDRREQ;
469}
470
471/*
472 * Clean up the probing when the namespace is killed off.
473 */
474void afs_fs_probe_cleanup(struct afs_net *net)
475{
476	if (del_timer_sync(&net->fs_probe_timer))
477		afs_dec_servers_outstanding(net);
478}
479