xref: /kernel/linux/linux-5.10/fs/afs/server.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* AFS server record management
3 *
4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include "afs_fs.h"
11#include "internal.h"
12#include "protocol_yfs.h"
13
14static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
15static atomic_t afs_server_debug_id;
16
17static struct afs_server *afs_maybe_use_server(struct afs_server *,
18					       enum afs_server_trace);
19static void __afs_put_server(struct afs_net *, struct afs_server *);
20
21/*
22 * Find a server by one of its addresses.
23 */
24struct afs_server *afs_find_server(struct afs_net *net,
25				   const struct sockaddr_rxrpc *srx)
26{
27	const struct afs_addr_list *alist;
28	struct afs_server *server = NULL;
29	unsigned int i;
30	int seq = 1, diff;
31
32	rcu_read_lock();
33
34	do {
35		if (server)
36			afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
37		server = NULL;
38		seq++; /* 2 on the 1st/lockless path, otherwise odd */
39		read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
40
41		if (srx->transport.family == AF_INET6) {
42			const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
43			hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
44				alist = rcu_dereference(server->addresses);
45				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
46					b = &alist->addrs[i].transport.sin6;
47					diff = ((u16 __force)a->sin6_port -
48						(u16 __force)b->sin6_port);
49					if (diff == 0)
50						diff = memcmp(&a->sin6_addr,
51							      &b->sin6_addr,
52							      sizeof(struct in6_addr));
53					if (diff == 0)
54						goto found;
55				}
56			}
57		} else {
58			const struct sockaddr_in *a = &srx->transport.sin, *b;
59			hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
60				alist = rcu_dereference(server->addresses);
61				for (i = 0; i < alist->nr_ipv4; i++) {
62					b = &alist->addrs[i].transport.sin;
63					diff = ((u16 __force)a->sin_port -
64						(u16 __force)b->sin_port);
65					if (diff == 0)
66						diff = ((u32 __force)a->sin_addr.s_addr -
67							(u32 __force)b->sin_addr.s_addr);
68					if (diff == 0)
69						goto found;
70				}
71			}
72		}
73
74		server = NULL;
75		continue;
76	found:
77		server = afs_maybe_use_server(server, afs_server_trace_get_by_addr);
78
79	} while (need_seqretry(&net->fs_addr_lock, seq));
80
81	done_seqretry(&net->fs_addr_lock, seq);
82
83	rcu_read_unlock();
84	return server;
85}
86
87/*
88 * Look up a server by its UUID and mark it active.
89 */
90struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
91{
92	struct afs_server *server = NULL;
93	struct rb_node *p;
94	int diff, seq = 1;
95
96	_enter("%pU", uuid);
97
98	do {
99		/* Unfortunately, rbtree walking doesn't give reliable results
100		 * under just the RCU read lock, so we have to check for
101		 * changes.
102		 */
103		if (server)
104			afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
105		server = NULL;
106		seq++; /* 2 on the 1st/lockless path, otherwise odd */
107		read_seqbegin_or_lock(&net->fs_lock, &seq);
108
109		p = net->fs_servers.rb_node;
110		while (p) {
111			server = rb_entry(p, struct afs_server, uuid_rb);
112
113			diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
114			if (diff < 0) {
115				p = p->rb_left;
116			} else if (diff > 0) {
117				p = p->rb_right;
118			} else {
119				afs_use_server(server, afs_server_trace_get_by_uuid);
120				break;
121			}
122
123			server = NULL;
124		}
125	} while (need_seqretry(&net->fs_lock, seq));
126
127	done_seqretry(&net->fs_lock, seq);
128
129	_leave(" = %p", server);
130	return server;
131}
132
133/*
134 * Install a server record in the namespace tree.  If there's a clash, we stick
135 * it into a list anchored on whichever afs_server struct is actually in the
136 * tree.
137 */
138static struct afs_server *afs_install_server(struct afs_cell *cell,
139					     struct afs_server *candidate)
140{
141	const struct afs_addr_list *alist;
142	struct afs_server *server, *next;
143	struct afs_net *net = cell->net;
144	struct rb_node **pp, *p;
145	int diff;
146
147	_enter("%p", candidate);
148
149	write_seqlock(&net->fs_lock);
150
151	/* Firstly install the server in the UUID lookup tree */
152	pp = &net->fs_servers.rb_node;
153	p = NULL;
154	while (*pp) {
155		p = *pp;
156		_debug("- consider %p", p);
157		server = rb_entry(p, struct afs_server, uuid_rb);
158		diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
159		if (diff < 0) {
160			pp = &(*pp)->rb_left;
161		} else if (diff > 0) {
162			pp = &(*pp)->rb_right;
163		} else {
164			if (server->cell == cell)
165				goto exists;
166
167			/* We have the same UUID representing servers in
168			 * different cells.  Append the new server to the list.
169			 */
170			for (;;) {
171				next = rcu_dereference_protected(
172					server->uuid_next,
173					lockdep_is_held(&net->fs_lock.lock));
174				if (!next)
175					break;
176				server = next;
177			}
178			rcu_assign_pointer(server->uuid_next, candidate);
179			candidate->uuid_prev = server;
180			server = candidate;
181			goto added_dup;
182		}
183	}
184
185	server = candidate;
186	rb_link_node(&server->uuid_rb, p, pp);
187	rb_insert_color(&server->uuid_rb, &net->fs_servers);
188	hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
189
190added_dup:
191	write_seqlock(&net->fs_addr_lock);
192	alist = rcu_dereference_protected(server->addresses,
193					  lockdep_is_held(&net->fs_addr_lock.lock));
194
195	/* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
196	 * it in the IPv4 and/or IPv6 reverse-map lists.
197	 *
198	 * TODO: For speed we want to use something other than a flat list
199	 * here; even sorting the list in terms of lowest address would help a
200	 * bit, but anything we might want to do gets messy and memory
201	 * intensive.
202	 */
203	if (alist->nr_ipv4 > 0)
204		hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
205	if (alist->nr_addrs > alist->nr_ipv4)
206		hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
207
208	write_sequnlock(&net->fs_addr_lock);
209
210exists:
211	afs_get_server(server, afs_server_trace_get_install);
212	write_sequnlock(&net->fs_lock);
213	return server;
214}
215
216/*
217 * Allocate a new server record and mark it active.
218 */
219static struct afs_server *afs_alloc_server(struct afs_cell *cell,
220					   const uuid_t *uuid,
221					   struct afs_addr_list *alist)
222{
223	struct afs_server *server;
224	struct afs_net *net = cell->net;
225
226	_enter("");
227
228	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
229	if (!server)
230		goto enomem;
231
232	atomic_set(&server->ref, 1);
233	atomic_set(&server->active, 1);
234	server->debug_id = atomic_inc_return(&afs_server_debug_id);
235	RCU_INIT_POINTER(server->addresses, alist);
236	server->addr_version = alist->version;
237	server->uuid = *uuid;
238	rwlock_init(&server->fs_lock);
239	init_waitqueue_head(&server->probe_wq);
240	INIT_LIST_HEAD(&server->probe_link);
241	spin_lock_init(&server->probe_lock);
242	server->cell = cell;
243	server->rtt = UINT_MAX;
244
245	afs_inc_servers_outstanding(net);
246	trace_afs_server(server, 1, 1, afs_server_trace_alloc);
247	_leave(" = %p", server);
248	return server;
249
250enomem:
251	_leave(" = NULL [nomem]");
252	return NULL;
253}
254
255/*
256 * Look up an address record for a server
257 */
258static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
259						 struct key *key, const uuid_t *uuid)
260{
261	struct afs_vl_cursor vc;
262	struct afs_addr_list *alist = NULL;
263	int ret;
264
265	ret = -ERESTARTSYS;
266	if (afs_begin_vlserver_operation(&vc, cell, key)) {
267		while (afs_select_vlserver(&vc)) {
268			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
269				alist = afs_yfsvl_get_endpoints(&vc, uuid);
270			else
271				alist = afs_vl_get_addrs_u(&vc, uuid);
272		}
273
274		ret = afs_end_vlserver_operation(&vc);
275	}
276
277	return ret < 0 ? ERR_PTR(ret) : alist;
278}
279
280/*
281 * Get or create a fileserver record.
282 */
283struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
284				     const uuid_t *uuid, u32 addr_version)
285{
286	struct afs_addr_list *alist;
287	struct afs_server *server, *candidate;
288
289	_enter("%p,%pU", cell->net, uuid);
290
291	server = afs_find_server_by_uuid(cell->net, uuid);
292	if (server) {
293		if (server->addr_version != addr_version)
294			set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
295		return server;
296	}
297
298	alist = afs_vl_lookup_addrs(cell, key, uuid);
299	if (IS_ERR(alist))
300		return ERR_CAST(alist);
301
302	candidate = afs_alloc_server(cell, uuid, alist);
303	if (!candidate) {
304		afs_put_addrlist(alist);
305		return ERR_PTR(-ENOMEM);
306	}
307
308	server = afs_install_server(cell, candidate);
309	if (server != candidate) {
310		afs_put_addrlist(alist);
311		kfree(candidate);
312	} else {
313		/* Immediately dispatch an asynchronous probe to each interface
314		 * on the fileserver.  This will make sure the repeat-probing
315		 * service is started.
316		 */
317		afs_fs_probe_fileserver(cell->net, server, key, true);
318	}
319
320	return server;
321}
322
323/*
324 * Set the server timer to fire after a given delay, assuming it's not already
325 * set for an earlier time.
326 */
327static void afs_set_server_timer(struct afs_net *net, time64_t delay)
328{
329	if (net->live) {
330		afs_inc_servers_outstanding(net);
331		if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
332			afs_dec_servers_outstanding(net);
333	}
334}
335
336/*
337 * Server management timer.  We have an increment on fs_outstanding that we
338 * need to pass along to the work item.
339 */
340void afs_servers_timer(struct timer_list *timer)
341{
342	struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
343
344	_enter("");
345	if (!queue_work(afs_wq, &net->fs_manager))
346		afs_dec_servers_outstanding(net);
347}
348
349/*
350 * Get a reference on a server object.
351 */
352struct afs_server *afs_get_server(struct afs_server *server,
353				  enum afs_server_trace reason)
354{
355	unsigned int u = atomic_inc_return(&server->ref);
356
357	trace_afs_server(server, u, atomic_read(&server->active), reason);
358	return server;
359}
360
361/*
362 * Try to get a reference on a server object.
363 */
364static struct afs_server *afs_maybe_use_server(struct afs_server *server,
365					       enum afs_server_trace reason)
366{
367	unsigned int r = atomic_fetch_add_unless(&server->ref, 1, 0);
368	unsigned int a;
369
370	if (r == 0)
371		return NULL;
372
373	a = atomic_inc_return(&server->active);
374	trace_afs_server(server, r, a, reason);
375	return server;
376}
377
378/*
379 * Get an active count on a server object.
380 */
381struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
382{
383	unsigned int r = atomic_inc_return(&server->ref);
384	unsigned int a = atomic_inc_return(&server->active);
385
386	trace_afs_server(server, r, a, reason);
387	return server;
388}
389
390/*
391 * Release a reference on a server record.
392 */
393void afs_put_server(struct afs_net *net, struct afs_server *server,
394		    enum afs_server_trace reason)
395{
396	unsigned int usage;
397
398	if (!server)
399		return;
400
401	usage = atomic_dec_return(&server->ref);
402	trace_afs_server(server, usage, atomic_read(&server->active), reason);
403	if (unlikely(usage == 0))
404		__afs_put_server(net, server);
405}
406
407/*
408 * Drop an active count on a server object without updating the last-unused
409 * time.
410 */
411void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
412			     enum afs_server_trace reason)
413{
414	if (server) {
415		unsigned int active = atomic_dec_return(&server->active);
416
417		if (active == 0)
418			afs_set_server_timer(net, afs_server_gc_delay);
419		afs_put_server(net, server, reason);
420	}
421}
422
423/*
424 * Drop an active count on a server object.
425 */
426void afs_unuse_server(struct afs_net *net, struct afs_server *server,
427		      enum afs_server_trace reason)
428{
429	if (server) {
430		server->unuse_time = ktime_get_real_seconds();
431		afs_unuse_server_notime(net, server, reason);
432	}
433}
434
435static void afs_server_rcu(struct rcu_head *rcu)
436{
437	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
438
439	trace_afs_server(server, atomic_read(&server->ref),
440			 atomic_read(&server->active), afs_server_trace_free);
441	afs_put_addrlist(rcu_access_pointer(server->addresses));
442	kfree(server);
443}
444
445static void __afs_put_server(struct afs_net *net, struct afs_server *server)
446{
447	call_rcu(&server->rcu, afs_server_rcu);
448	afs_dec_servers_outstanding(net);
449}
450
451static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server)
452{
453	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
454	struct afs_addr_cursor ac = {
455		.alist	= alist,
456		.index	= alist->preferred,
457		.error	= 0,
458	};
459
460	afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
461}
462
463/*
464 * destroy a dead server
465 */
466static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
467{
468	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
469		afs_give_up_callbacks(net, server);
470
471	afs_put_server(net, server, afs_server_trace_destroy);
472}
473
474/*
475 * Garbage collect any expired servers.
476 */
477static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
478{
479	struct afs_server *server, *next, *prev;
480	int active;
481
482	while ((server = gc_list)) {
483		gc_list = server->gc_next;
484
485		write_seqlock(&net->fs_lock);
486
487		active = atomic_read(&server->active);
488		if (active == 0) {
489			trace_afs_server(server, atomic_read(&server->ref),
490					 active, afs_server_trace_gc);
491			next = rcu_dereference_protected(
492				server->uuid_next, lockdep_is_held(&net->fs_lock.lock));
493			prev = server->uuid_prev;
494			if (!prev) {
495				/* The one at the front is in the tree */
496				if (!next) {
497					rb_erase(&server->uuid_rb, &net->fs_servers);
498				} else {
499					rb_replace_node_rcu(&server->uuid_rb,
500							    &next->uuid_rb,
501							    &net->fs_servers);
502					next->uuid_prev = NULL;
503				}
504			} else {
505				/* This server is not at the front */
506				rcu_assign_pointer(prev->uuid_next, next);
507				if (next)
508					next->uuid_prev = prev;
509			}
510
511			list_del(&server->probe_link);
512			hlist_del_rcu(&server->proc_link);
513			if (!hlist_unhashed(&server->addr4_link))
514				hlist_del_rcu(&server->addr4_link);
515			if (!hlist_unhashed(&server->addr6_link))
516				hlist_del_rcu(&server->addr6_link);
517		}
518		write_sequnlock(&net->fs_lock);
519
520		if (active == 0)
521			afs_destroy_server(net, server);
522	}
523}
524
525/*
526 * Manage the records of servers known to be within a network namespace.  This
527 * includes garbage collecting unused servers.
528 *
529 * Note also that we were given an increment on net->servers_outstanding by
530 * whoever queued us that we need to deal with before returning.
531 */
532void afs_manage_servers(struct work_struct *work)
533{
534	struct afs_net *net = container_of(work, struct afs_net, fs_manager);
535	struct afs_server *gc_list = NULL;
536	struct rb_node *cursor;
537	time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
538	bool purging = !net->live;
539
540	_enter("");
541
542	/* Trawl the server list looking for servers that have expired from
543	 * lack of use.
544	 */
545	read_seqlock_excl(&net->fs_lock);
546
547	for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
548		struct afs_server *server =
549			rb_entry(cursor, struct afs_server, uuid_rb);
550		int active = atomic_read(&server->active);
551
552		_debug("manage %pU %u", &server->uuid, active);
553
554		if (purging) {
555			trace_afs_server(server, atomic_read(&server->ref),
556					 active, afs_server_trace_purging);
557			if (active != 0)
558				pr_notice("Can't purge s=%08x\n", server->debug_id);
559		}
560
561		if (active == 0) {
562			time64_t expire_at = server->unuse_time;
563
564			if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
565			    !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
566				expire_at += afs_server_gc_delay;
567			if (purging || expire_at <= now) {
568				server->gc_next = gc_list;
569				gc_list = server;
570			} else if (expire_at < next_manage) {
571				next_manage = expire_at;
572			}
573		}
574	}
575
576	read_sequnlock_excl(&net->fs_lock);
577
578	/* Update the timer on the way out.  We have to pass an increment on
579	 * servers_outstanding in the namespace that we are in to the timer or
580	 * the work scheduler.
581	 */
582	if (!purging && next_manage < TIME64_MAX) {
583		now = ktime_get_real_seconds();
584
585		if (next_manage - now <= 0) {
586			if (queue_work(afs_wq, &net->fs_manager))
587				afs_inc_servers_outstanding(net);
588		} else {
589			afs_set_server_timer(net, next_manage - now);
590		}
591	}
592
593	afs_gc_servers(net, gc_list);
594
595	afs_dec_servers_outstanding(net);
596	_leave(" [%d]", atomic_read(&net->servers_outstanding));
597}
598
599static void afs_queue_server_manager(struct afs_net *net)
600{
601	afs_inc_servers_outstanding(net);
602	if (!queue_work(afs_wq, &net->fs_manager))
603		afs_dec_servers_outstanding(net);
604}
605
606/*
607 * Purge list of servers.
608 */
609void afs_purge_servers(struct afs_net *net)
610{
611	_enter("");
612
613	if (del_timer_sync(&net->fs_timer))
614		afs_dec_servers_outstanding(net);
615
616	afs_queue_server_manager(net);
617
618	_debug("wait");
619	atomic_dec(&net->servers_outstanding);
620	wait_var_event(&net->servers_outstanding,
621		       !atomic_read(&net->servers_outstanding));
622	_leave("");
623}
624
625/*
626 * Get an update for a server's address list.
627 */
628static noinline bool afs_update_server_record(struct afs_operation *op,
629					      struct afs_server *server)
630{
631	struct afs_addr_list *alist, *discard;
632
633	_enter("");
634
635	trace_afs_server(server, atomic_read(&server->ref), atomic_read(&server->active),
636			 afs_server_trace_update);
637
638	alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid);
639	if (IS_ERR(alist)) {
640		if ((PTR_ERR(alist) == -ERESTARTSYS ||
641		     PTR_ERR(alist) == -EINTR) &&
642		    (op->flags & AFS_OPERATION_UNINTR) &&
643		    server->addresses) {
644			_leave(" = t [intr]");
645			return true;
646		}
647		op->error = PTR_ERR(alist);
648		_leave(" = f [%d]", op->error);
649		return false;
650	}
651
652	discard = alist;
653	if (server->addr_version != alist->version) {
654		write_lock(&server->fs_lock);
655		discard = rcu_dereference_protected(server->addresses,
656						    lockdep_is_held(&server->fs_lock));
657		rcu_assign_pointer(server->addresses, alist);
658		server->addr_version = alist->version;
659		write_unlock(&server->fs_lock);
660	}
661
662	afs_put_addrlist(discard);
663	_leave(" = t");
664	return true;
665}
666
667/*
668 * See if a server's address list needs updating.
669 */
670bool afs_check_server_record(struct afs_operation *op, struct afs_server *server)
671{
672	bool success;
673	int ret, retries = 0;
674
675	_enter("");
676
677	ASSERT(server);
678
679retry:
680	if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags))
681		goto wait;
682	if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags))
683		goto update;
684	_leave(" = t [good]");
685	return true;
686
687update:
688	if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
689		clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
690		success = afs_update_server_record(op, server);
691		clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
692		wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
693		_leave(" = %d", success);
694		return success;
695	}
696
697wait:
698	ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
699			  (op->flags & AFS_OPERATION_UNINTR) ?
700			  TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
701	if (ret == -ERESTARTSYS) {
702		op->error = ret;
703		_leave(" = f [intr]");
704		return false;
705	}
706
707	retries++;
708	if (retries == 4) {
709		_leave(" = f [stale]");
710		ret = -ESTALE;
711		return false;
712	}
713	goto retry;
714}
715