1// SPDX-License-Identifier: GPL-2.0-or-later 2/* AFS fileserver probing 3 * 4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8#include <linux/sched.h> 9#include <linux/slab.h> 10#include "afs_fs.h" 11#include "internal.h" 12#include "protocol_yfs.h" 13 14static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; 15static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; 16 17/* 18 * Start the probe polling timer. We have to supply it with an inc on the 19 * outstanding server count. 20 */ 21static void afs_schedule_fs_probe(struct afs_net *net, 22 struct afs_server *server, bool fast) 23{ 24 unsigned long atj; 25 26 if (!net->live) 27 return; 28 29 atj = server->probed_at; 30 atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; 31 32 afs_inc_servers_outstanding(net); 33 if (timer_reduce(&net->fs_probe_timer, atj)) 34 afs_dec_servers_outstanding(net); 35} 36 37/* 38 * Handle the completion of a set of probes. 39 */ 40static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) 41{ 42 bool responded = server->probe.responded; 43 44 write_seqlock(&net->fs_lock); 45 if (responded) { 46 list_add_tail(&server->probe_link, &net->fs_probe_slow); 47 } else { 48 server->rtt = UINT_MAX; 49 clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 50 list_add_tail(&server->probe_link, &net->fs_probe_fast); 51 } 52 write_sequnlock(&net->fs_lock); 53 54 afs_schedule_fs_probe(net, server, !responded); 55} 56 57/* 58 * Handle the completion of a probe. 59 */ 60static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) 61{ 62 _enter(""); 63 64 if (atomic_dec_and_test(&server->probe_outstanding)) 65 afs_finished_fs_probe(net, server); 66 67 wake_up_all(&server->probe_wq); 68} 69 70/* 71 * Handle inability to send a probe due to ENOMEM when trying to allocate a 72 * call struct. 73 */ 74static void afs_fs_probe_not_done(struct afs_net *net, 75 struct afs_server *server, 76 struct afs_addr_cursor *ac) 77{ 78 struct afs_addr_list *alist = ac->alist; 79 unsigned int index = ac->index; 80 81 _enter(""); 82 83 trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); 84 spin_lock(&server->probe_lock); 85 86 server->probe.local_failure = true; 87 if (server->probe.error == 0) 88 server->probe.error = -ENOMEM; 89 90 set_bit(index, &alist->failed); 91 92 spin_unlock(&server->probe_lock); 93 return afs_done_one_fs_probe(net, server); 94} 95 96/* 97 * Process the result of probing a fileserver. This is called after successful 98 * or failed delivery of an FS.GetCapabilities operation. 99 */ 100void afs_fileserver_probe_result(struct afs_call *call) 101{ 102 struct afs_addr_list *alist = call->alist; 103 struct afs_server *server = call->server; 104 unsigned int index = call->addr_ix; 105 unsigned int rtt_us = 0; 106 int ret = call->error; 107 108 _enter("%pU,%u", &server->uuid, index); 109 110 spin_lock(&server->probe_lock); 111 112 switch (ret) { 113 case 0: 114 server->probe.error = 0; 115 goto responded; 116 case -ECONNABORTED: 117 if (!server->probe.responded) { 118 server->probe.abort_code = call->abort_code; 119 server->probe.error = ret; 120 } 121 goto responded; 122 case -ENOMEM: 123 case -ENONET: 124 clear_bit(index, &alist->responded); 125 server->probe.local_failure = true; 126 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 127 goto out; 128 case -ECONNRESET: /* Responded, but call expired. */ 129 case -ERFKILL: 130 case -EADDRNOTAVAIL: 131 case -ENETUNREACH: 132 case -EHOSTUNREACH: 133 case -EHOSTDOWN: 134 case -ECONNREFUSED: 135 case -ETIMEDOUT: 136 case -ETIME: 137 default: 138 clear_bit(index, &alist->responded); 139 set_bit(index, &alist->failed); 140 if (!server->probe.responded && 141 (server->probe.error == 0 || 142 server->probe.error == -ETIMEDOUT || 143 server->probe.error == -ETIME)) 144 server->probe.error = ret; 145 trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); 146 goto out; 147 } 148 149responded: 150 clear_bit(index, &alist->failed); 151 152 if (call->service_id == YFS_FS_SERVICE) { 153 server->probe.is_yfs = true; 154 set_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 155 alist->addrs[index].srx_service = call->service_id; 156 } else { 157 server->probe.not_yfs = true; 158 if (!server->probe.is_yfs) { 159 clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); 160 alist->addrs[index].srx_service = call->service_id; 161 } 162 } 163 164 rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); 165 if (rtt_us < server->probe.rtt) { 166 server->probe.rtt = rtt_us; 167 server->rtt = rtt_us; 168 alist->preferred = index; 169 } 170 171 smp_wmb(); /* Set rtt before responded. */ 172 server->probe.responded = true; 173 set_bit(index, &alist->responded); 174 set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); 175out: 176 spin_unlock(&server->probe_lock); 177 178 _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", 179 &server->uuid, index, &alist->addrs[index].transport, 180 rtt_us, ret); 181 182 return afs_done_one_fs_probe(call->net, server); 183} 184 185/* 186 * Probe one or all of a fileserver's addresses to find out the best route and 187 * to query its capabilities. 188 */ 189void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, 190 struct key *key, bool all) 191{ 192 struct afs_addr_cursor ac = { 193 .index = 0, 194 }; 195 196 _enter("%pU", &server->uuid); 197 198 read_lock(&server->fs_lock); 199 ac.alist = rcu_dereference_protected(server->addresses, 200 lockdep_is_held(&server->fs_lock)); 201 afs_get_addrlist(ac.alist); 202 read_unlock(&server->fs_lock); 203 204 server->probed_at = jiffies; 205 atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); 206 memset(&server->probe, 0, sizeof(server->probe)); 207 server->probe.rtt = UINT_MAX; 208 209 ac.index = ac.alist->preferred; 210 if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) 211 all = true; 212 213 if (all) { 214 for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) 215 if (!afs_fs_get_capabilities(net, server, &ac, key)) 216 afs_fs_probe_not_done(net, server, &ac); 217 } else { 218 if (!afs_fs_get_capabilities(net, server, &ac, key)) 219 afs_fs_probe_not_done(net, server, &ac); 220 } 221 222 afs_put_addrlist(ac.alist); 223} 224 225/* 226 * Wait for the first as-yet untried fileserver to respond. 227 */ 228int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) 229{ 230 struct wait_queue_entry *waits; 231 struct afs_server *server; 232 unsigned int rtt = UINT_MAX, rtt_s; 233 bool have_responders = false; 234 int pref = -1, i; 235 236 _enter("%u,%lx", slist->nr_servers, untried); 237 238 /* Only wait for servers that have a probe outstanding. */ 239 for (i = 0; i < slist->nr_servers; i++) { 240 if (test_bit(i, &untried)) { 241 server = slist->servers[i].server; 242 if (!atomic_read(&server->probe_outstanding)) 243 __clear_bit(i, &untried); 244 if (server->probe.responded) 245 have_responders = true; 246 } 247 } 248 if (have_responders || !untried) 249 return 0; 250 251 waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL); 252 if (!waits) 253 return -ENOMEM; 254 255 for (i = 0; i < slist->nr_servers; i++) { 256 if (test_bit(i, &untried)) { 257 server = slist->servers[i].server; 258 init_waitqueue_entry(&waits[i], current); 259 add_wait_queue(&server->probe_wq, &waits[i]); 260 } 261 } 262 263 for (;;) { 264 bool still_probing = false; 265 266 set_current_state(TASK_INTERRUPTIBLE); 267 for (i = 0; i < slist->nr_servers; i++) { 268 if (test_bit(i, &untried)) { 269 server = slist->servers[i].server; 270 if (server->probe.responded) 271 goto stop; 272 if (atomic_read(&server->probe_outstanding)) 273 still_probing = true; 274 } 275 } 276 277 if (!still_probing || signal_pending(current)) 278 goto stop; 279 schedule(); 280 } 281 282stop: 283 set_current_state(TASK_RUNNING); 284 285 for (i = 0; i < slist->nr_servers; i++) { 286 if (test_bit(i, &untried)) { 287 server = slist->servers[i].server; 288 rtt_s = READ_ONCE(server->rtt); 289 if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && 290 rtt_s < rtt) { 291 pref = i; 292 rtt = rtt_s; 293 } 294 295 remove_wait_queue(&server->probe_wq, &waits[i]); 296 } 297 } 298 299 kfree(waits); 300 301 if (pref == -1 && signal_pending(current)) 302 return -ERESTARTSYS; 303 304 if (pref >= 0) 305 slist->preferred = pref; 306 return 0; 307} 308 309/* 310 * Probe timer. We have an increment on fs_outstanding that we need to pass 311 * along to the work item. 312 */ 313void afs_fs_probe_timer(struct timer_list *timer) 314{ 315 struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); 316 317 if (!net->live || !queue_work(afs_wq, &net->fs_prober)) 318 afs_dec_servers_outstanding(net); 319} 320 321/* 322 * Dispatch a probe to a server. 323 */ 324static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) 325 __releases(&net->fs_lock) 326{ 327 struct key *key = NULL; 328 329 /* We remove it from the queues here - it will be added back to 330 * one of the queues on the completion of the probe. 331 */ 332 list_del_init(&server->probe_link); 333 334 afs_get_server(server, afs_server_trace_get_probe); 335 write_sequnlock(&net->fs_lock); 336 337 afs_fs_probe_fileserver(net, server, key, all); 338 afs_put_server(net, server, afs_server_trace_put_probe); 339} 340 341/* 342 * Probe a server immediately without waiting for its due time to come 343 * round. This is used when all of the addresses have been tried. 344 */ 345void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) 346{ 347 write_seqlock(&net->fs_lock); 348 if (!list_empty(&server->probe_link)) 349 return afs_dispatch_fs_probe(net, server, true); 350 write_sequnlock(&net->fs_lock); 351} 352 353/* 354 * Probe dispatcher to regularly dispatch probes to keep NAT alive. 355 */ 356void afs_fs_probe_dispatcher(struct work_struct *work) 357{ 358 struct afs_net *net = container_of(work, struct afs_net, fs_prober); 359 struct afs_server *fast, *slow, *server; 360 unsigned long nowj, timer_at, poll_at; 361 bool first_pass = true, set_timer = false; 362 363 if (!net->live) { 364 afs_dec_servers_outstanding(net); 365 return; 366 } 367 368 _enter(""); 369 370 if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { 371 afs_dec_servers_outstanding(net); 372 _leave(" [none]"); 373 return; 374 } 375 376again: 377 write_seqlock(&net->fs_lock); 378 379 fast = slow = server = NULL; 380 nowj = jiffies; 381 timer_at = nowj + MAX_JIFFY_OFFSET; 382 383 if (!list_empty(&net->fs_probe_fast)) { 384 fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); 385 poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; 386 if (time_before(nowj, poll_at)) { 387 timer_at = poll_at; 388 set_timer = true; 389 fast = NULL; 390 } 391 } 392 393 if (!list_empty(&net->fs_probe_slow)) { 394 slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); 395 poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; 396 if (time_before(nowj, poll_at)) { 397 if (time_before(poll_at, timer_at)) 398 timer_at = poll_at; 399 set_timer = true; 400 slow = NULL; 401 } 402 } 403 404 server = fast ?: slow; 405 if (server) 406 _debug("probe %pU", &server->uuid); 407 408 if (server && (first_pass || !need_resched())) { 409 afs_dispatch_fs_probe(net, server, server == fast); 410 first_pass = false; 411 goto again; 412 } 413 414 write_sequnlock(&net->fs_lock); 415 416 if (server) { 417 if (!queue_work(afs_wq, &net->fs_prober)) 418 afs_dec_servers_outstanding(net); 419 _leave(" [requeue]"); 420 } else if (set_timer) { 421 if (timer_reduce(&net->fs_probe_timer, timer_at)) 422 afs_dec_servers_outstanding(net); 423 _leave(" [timer]"); 424 } else { 425 afs_dec_servers_outstanding(net); 426 _leave(" [quiesce]"); 427 } 428} 429 430/* 431 * Wait for a probe on a particular fileserver to complete for 2s. 432 */ 433int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) 434{ 435 struct wait_queue_entry wait; 436 unsigned long timo = 2 * HZ; 437 438 if (atomic_read(&server->probe_outstanding) == 0) 439 goto dont_wait; 440 441 init_wait_entry(&wait, 0); 442 for (;;) { 443 prepare_to_wait_event(&server->probe_wq, &wait, 444 is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); 445 if (timo == 0 || 446 server->probe.responded || 447 atomic_read(&server->probe_outstanding) == 0 || 448 (is_intr && signal_pending(current))) 449 break; 450 timo = schedule_timeout(timo); 451 } 452 453 finish_wait(&server->probe_wq, &wait); 454 455dont_wait: 456 if (server->probe.responded) 457 return 0; 458 if (is_intr && signal_pending(current)) 459 return -ERESTARTSYS; 460 if (timo == 0) 461 return -ETIME; 462 return -EDESTADDRREQ; 463} 464 465/* 466 * Clean up the probing when the namespace is killed off. 467 */ 468void afs_fs_probe_cleanup(struct afs_net *net) 469{ 470 if (del_timer_sync(&net->fs_probe_timer)) 471 afs_dec_servers_outstanding(net); 472} 473