162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* Handle vlserver selection and rotation. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 562306a36Sopenharmony_ci * Written by David Howells (dhowells@redhat.com) 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/kernel.h> 962306a36Sopenharmony_ci#include <linux/sched.h> 1062306a36Sopenharmony_ci#include <linux/sched/signal.h> 1162306a36Sopenharmony_ci#include "internal.h" 1262306a36Sopenharmony_ci#include "afs_vl.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/* 1562306a36Sopenharmony_ci * Begin an operation on a volume location server. 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_cibool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 1862306a36Sopenharmony_ci struct key *key) 1962306a36Sopenharmony_ci{ 2062306a36Sopenharmony_ci memset(vc, 0, sizeof(*vc)); 2162306a36Sopenharmony_ci vc->cell = cell; 2262306a36Sopenharmony_ci vc->key = key; 2362306a36Sopenharmony_ci vc->error = -EDESTADDRREQ; 2462306a36Sopenharmony_ci vc->ac.error = SHRT_MAX; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci if (signal_pending(current)) { 2762306a36Sopenharmony_ci vc->error = -EINTR; 2862306a36Sopenharmony_ci vc->flags |= AFS_VL_CURSOR_STOP; 2962306a36Sopenharmony_ci return false; 3062306a36Sopenharmony_ci } 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci return true; 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * Begin iteration through a server list, starting with the last used server if 3762306a36Sopenharmony_ci * possible, or the last recorded good server if not. 3862306a36Sopenharmony_ci */ 3962306a36Sopenharmony_cistatic bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci struct afs_cell *cell = vc->cell; 4262306a36Sopenharmony_ci unsigned int dns_lookup_count; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci if (cell->dns_source == DNS_RECORD_UNAVAILABLE || 4562306a36Sopenharmony_ci cell->dns_expiry <= ktime_get_real_seconds()) { 4662306a36Sopenharmony_ci dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); 4762306a36Sopenharmony_ci set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); 4862306a36Sopenharmony_ci afs_queue_cell(cell, afs_cell_trace_get_queue_dns); 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 5162306a36Sopenharmony_ci if (wait_var_event_interruptible( 5262306a36Sopenharmony_ci &cell->dns_lookup_count, 5362306a36Sopenharmony_ci smp_load_acquire(&cell->dns_lookup_count) 5462306a36Sopenharmony_ci != dns_lookup_count) < 0) { 5562306a36Sopenharmony_ci vc->error = -ERESTARTSYS; 5662306a36Sopenharmony_ci return false; 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci } 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci /* Status load is ordered after lookup counter load */ 6162306a36Sopenharmony_ci if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { 6262306a36Sopenharmony_ci pr_warn("No record of cell %s\n", cell->name); 6362306a36Sopenharmony_ci vc->error = -ENOENT; 6462306a36Sopenharmony_ci return false; 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 6862306a36Sopenharmony_ci vc->error = -EDESTADDRREQ; 6962306a36Sopenharmony_ci return false; 7062306a36Sopenharmony_ci } 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci read_lock(&cell->vl_servers_lock); 7462306a36Sopenharmony_ci vc->server_list = afs_get_vlserverlist( 7562306a36Sopenharmony_ci rcu_dereference_protected(cell->vl_servers, 7662306a36Sopenharmony_ci lockdep_is_held(&cell->vl_servers_lock))); 7762306a36Sopenharmony_ci read_unlock(&cell->vl_servers_lock); 7862306a36Sopenharmony_ci if (!vc->server_list->nr_servers) 7962306a36Sopenharmony_ci return false; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci vc->untried = (1UL << vc->server_list->nr_servers) - 1; 8262306a36Sopenharmony_ci vc->index = -1; 8362306a36Sopenharmony_ci return true; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* 8762306a36Sopenharmony_ci * Select the vlserver to use. May be called multiple times to rotate 8862306a36Sopenharmony_ci * through the vlservers. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_cibool afs_select_vlserver(struct afs_vl_cursor *vc) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci struct afs_addr_list *alist; 9362306a36Sopenharmony_ci struct afs_vlserver *vlserver; 9462306a36Sopenharmony_ci struct afs_error e; 9562306a36Sopenharmony_ci u32 rtt; 9662306a36Sopenharmony_ci int error = vc->ac.error, i; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci _enter("%lx[%d],%lx[%d],%d,%d", 9962306a36Sopenharmony_ci vc->untried, vc->index, 10062306a36Sopenharmony_ci vc->ac.tried, vc->ac.index, 10162306a36Sopenharmony_ci error, vc->ac.abort_code); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci if (vc->flags & AFS_VL_CURSOR_STOP) { 10462306a36Sopenharmony_ci _leave(" = f [stopped]"); 10562306a36Sopenharmony_ci return false; 10662306a36Sopenharmony_ci } 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci vc->nr_iterations++; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* Evaluate the result of the previous operation, if there was one. */ 11162306a36Sopenharmony_ci switch (error) { 11262306a36Sopenharmony_ci case SHRT_MAX: 11362306a36Sopenharmony_ci goto start; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci default: 11662306a36Sopenharmony_ci case 0: 11762306a36Sopenharmony_ci /* Success or local failure. Stop. */ 11862306a36Sopenharmony_ci vc->error = error; 11962306a36Sopenharmony_ci vc->flags |= AFS_VL_CURSOR_STOP; 12062306a36Sopenharmony_ci _leave(" = f [okay/local %d]", vc->ac.error); 12162306a36Sopenharmony_ci return false; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci case -ECONNABORTED: 12462306a36Sopenharmony_ci /* The far side rejected the operation on some grounds. This 12562306a36Sopenharmony_ci * might involve the server being busy or the volume having been moved. 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_ci switch (vc->ac.abort_code) { 12862306a36Sopenharmony_ci case AFSVL_IO: 12962306a36Sopenharmony_ci case AFSVL_BADVOLOPER: 13062306a36Sopenharmony_ci case AFSVL_NOMEM: 13162306a36Sopenharmony_ci /* The server went weird. */ 13262306a36Sopenharmony_ci vc->error = -EREMOTEIO; 13362306a36Sopenharmony_ci //write_lock(&vc->cell->vl_servers_lock); 13462306a36Sopenharmony_ci //vc->server_list->weird_mask |= 1 << vc->index; 13562306a36Sopenharmony_ci //write_unlock(&vc->cell->vl_servers_lock); 13662306a36Sopenharmony_ci goto next_server; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci default: 13962306a36Sopenharmony_ci vc->error = afs_abort_to_error(vc->ac.abort_code); 14062306a36Sopenharmony_ci goto failed; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci case -ERFKILL: 14462306a36Sopenharmony_ci case -EADDRNOTAVAIL: 14562306a36Sopenharmony_ci case -ENETUNREACH: 14662306a36Sopenharmony_ci case -EHOSTUNREACH: 14762306a36Sopenharmony_ci case -EHOSTDOWN: 14862306a36Sopenharmony_ci case -ECONNREFUSED: 14962306a36Sopenharmony_ci case -ETIMEDOUT: 15062306a36Sopenharmony_ci case -ETIME: 15162306a36Sopenharmony_ci _debug("no conn %d", error); 15262306a36Sopenharmony_ci vc->error = error; 15362306a36Sopenharmony_ci goto iterate_address; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci case -ECONNRESET: 15662306a36Sopenharmony_ci _debug("call reset"); 15762306a36Sopenharmony_ci vc->error = error; 15862306a36Sopenharmony_ci vc->flags |= AFS_VL_CURSOR_RETRY; 15962306a36Sopenharmony_ci goto next_server; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci case -EOPNOTSUPP: 16262306a36Sopenharmony_ci _debug("notsupp"); 16362306a36Sopenharmony_ci goto next_server; 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_cirestart_from_beginning: 16762306a36Sopenharmony_ci _debug("restart"); 16862306a36Sopenharmony_ci afs_end_cursor(&vc->ac); 16962306a36Sopenharmony_ci afs_put_vlserverlist(vc->cell->net, vc->server_list); 17062306a36Sopenharmony_ci vc->server_list = NULL; 17162306a36Sopenharmony_ci if (vc->flags & AFS_VL_CURSOR_RETRIED) 17262306a36Sopenharmony_ci goto failed; 17362306a36Sopenharmony_ci vc->flags |= AFS_VL_CURSOR_RETRIED; 17462306a36Sopenharmony_cistart: 17562306a36Sopenharmony_ci _debug("start"); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if (!afs_start_vl_iteration(vc)) 17862306a36Sopenharmony_ci goto failed; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 18162306a36Sopenharmony_ci if (error < 0) 18262306a36Sopenharmony_ci goto failed_set_error; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cipick_server: 18562306a36Sopenharmony_ci _debug("pick [%lx]", vc->untried); 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci error = afs_wait_for_vl_probes(vc->server_list, vc->untried); 18862306a36Sopenharmony_ci if (error < 0) 18962306a36Sopenharmony_ci goto failed_set_error; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci /* Pick the untried server with the lowest RTT. */ 19262306a36Sopenharmony_ci vc->index = vc->server_list->preferred; 19362306a36Sopenharmony_ci if (test_bit(vc->index, &vc->untried)) 19462306a36Sopenharmony_ci goto selected_server; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci vc->index = -1; 19762306a36Sopenharmony_ci rtt = U32_MAX; 19862306a36Sopenharmony_ci for (i = 0; i < vc->server_list->nr_servers; i++) { 19962306a36Sopenharmony_ci struct afs_vlserver *s = vc->server_list->servers[i].server; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci if (!test_bit(i, &vc->untried) || 20262306a36Sopenharmony_ci !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 20362306a36Sopenharmony_ci continue; 20462306a36Sopenharmony_ci if (s->probe.rtt < rtt) { 20562306a36Sopenharmony_ci vc->index = i; 20662306a36Sopenharmony_ci rtt = s->probe.rtt; 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci if (vc->index == -1) 21162306a36Sopenharmony_ci goto no_more_servers; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ciselected_server: 21462306a36Sopenharmony_ci _debug("use %d", vc->index); 21562306a36Sopenharmony_ci __clear_bit(vc->index, &vc->untried); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* We're starting on a different vlserver from the list. We need to 21862306a36Sopenharmony_ci * check it, find its address list and probe its capabilities before we 21962306a36Sopenharmony_ci * use it. 22062306a36Sopenharmony_ci */ 22162306a36Sopenharmony_ci ASSERTCMP(vc->ac.alist, ==, NULL); 22262306a36Sopenharmony_ci vlserver = vc->server_list->servers[vc->index].server; 22362306a36Sopenharmony_ci vc->server = vlserver; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci _debug("USING VLSERVER: %s", vlserver->name); 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci read_lock(&vlserver->lock); 22862306a36Sopenharmony_ci alist = rcu_dereference_protected(vlserver->addresses, 22962306a36Sopenharmony_ci lockdep_is_held(&vlserver->lock)); 23062306a36Sopenharmony_ci afs_get_addrlist(alist); 23162306a36Sopenharmony_ci read_unlock(&vlserver->lock); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci memset(&vc->ac, 0, sizeof(vc->ac)); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci if (!vc->ac.alist) 23662306a36Sopenharmony_ci vc->ac.alist = alist; 23762306a36Sopenharmony_ci else 23862306a36Sopenharmony_ci afs_put_addrlist(alist); 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci vc->ac.index = -1; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ciiterate_address: 24362306a36Sopenharmony_ci ASSERT(vc->ac.alist); 24462306a36Sopenharmony_ci /* Iterate over the current server's address list to try and find an 24562306a36Sopenharmony_ci * address on which it will respond to us. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_ci if (!afs_iterate_addresses(&vc->ac)) 24862306a36Sopenharmony_ci goto next_server; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 25362306a36Sopenharmony_ci return true; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cinext_server: 25662306a36Sopenharmony_ci _debug("next"); 25762306a36Sopenharmony_ci afs_end_cursor(&vc->ac); 25862306a36Sopenharmony_ci goto pick_server; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_cino_more_servers: 26162306a36Sopenharmony_ci /* That's all the servers poked to no good effect. Try again if some 26262306a36Sopenharmony_ci * of them were busy. 26362306a36Sopenharmony_ci */ 26462306a36Sopenharmony_ci if (vc->flags & AFS_VL_CURSOR_RETRY) 26562306a36Sopenharmony_ci goto restart_from_beginning; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci e.error = -EDESTADDRREQ; 26862306a36Sopenharmony_ci e.responded = false; 26962306a36Sopenharmony_ci for (i = 0; i < vc->server_list->nr_servers; i++) { 27062306a36Sopenharmony_ci struct afs_vlserver *s = vc->server_list->servers[i].server; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 27362306a36Sopenharmony_ci e.responded = true; 27462306a36Sopenharmony_ci afs_prioritise_error(&e, READ_ONCE(s->probe.error), 27562306a36Sopenharmony_ci s->probe.abort_code); 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci error = e.error; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_cifailed_set_error: 28162306a36Sopenharmony_ci vc->error = error; 28262306a36Sopenharmony_cifailed: 28362306a36Sopenharmony_ci vc->flags |= AFS_VL_CURSOR_STOP; 28462306a36Sopenharmony_ci afs_end_cursor(&vc->ac); 28562306a36Sopenharmony_ci _leave(" = f [failed %d]", vc->error); 28662306a36Sopenharmony_ci return false; 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci/* 29062306a36Sopenharmony_ci * Dump cursor state in the case of the error being EDESTADDRREQ. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_cistatic void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci struct afs_cell *cell = vc->cell; 29562306a36Sopenharmony_ci static int count; 29662306a36Sopenharmony_ci int i; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 29962306a36Sopenharmony_ci return; 30062306a36Sopenharmony_ci count++; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci rcu_read_lock(); 30362306a36Sopenharmony_ci pr_notice("EDESTADDR occurred\n"); 30462306a36Sopenharmony_ci pr_notice("CELL: %s err=%d\n", cell->name, cell->error); 30562306a36Sopenharmony_ci pr_notice("DNS: src=%u st=%u lc=%x\n", 30662306a36Sopenharmony_ci cell->dns_source, cell->dns_status, cell->dns_lookup_count); 30762306a36Sopenharmony_ci pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 30862306a36Sopenharmony_ci vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci if (vc->server_list) { 31162306a36Sopenharmony_ci const struct afs_vlserver_list *sl = vc->server_list; 31262306a36Sopenharmony_ci pr_notice("VC: SL nr=%u ix=%u\n", 31362306a36Sopenharmony_ci sl->nr_servers, sl->index); 31462306a36Sopenharmony_ci for (i = 0; i < sl->nr_servers; i++) { 31562306a36Sopenharmony_ci const struct afs_vlserver *s = sl->servers[i].server; 31662306a36Sopenharmony_ci pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 31762306a36Sopenharmony_ci s->name, s->port, s->flags, s->probe.error); 31862306a36Sopenharmony_ci if (s->addresses) { 31962306a36Sopenharmony_ci const struct afs_addr_list *a = 32062306a36Sopenharmony_ci rcu_dereference(s->addresses); 32162306a36Sopenharmony_ci pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 32262306a36Sopenharmony_ci a->nr_ipv4, a->nr_addrs, a->max_addrs, 32362306a36Sopenharmony_ci a->preferred); 32462306a36Sopenharmony_ci pr_notice("VC: - R=%lx F=%lx\n", 32562306a36Sopenharmony_ci a->responded, a->failed); 32662306a36Sopenharmony_ci if (a == vc->ac.alist) 32762306a36Sopenharmony_ci pr_notice("VC: - current\n"); 32862306a36Sopenharmony_ci } 32962306a36Sopenharmony_ci } 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", 33362306a36Sopenharmony_ci vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, 33462306a36Sopenharmony_ci vc->ac.responded, vc->ac.nr_iterations); 33562306a36Sopenharmony_ci rcu_read_unlock(); 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* 33962306a36Sopenharmony_ci * Tidy up a volume location server cursor and unlock the vnode. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ciint afs_end_vlserver_operation(struct afs_vl_cursor *vc) 34262306a36Sopenharmony_ci{ 34362306a36Sopenharmony_ci struct afs_net *net = vc->cell->net; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (vc->error == -EDESTADDRREQ || 34662306a36Sopenharmony_ci vc->error == -EADDRNOTAVAIL || 34762306a36Sopenharmony_ci vc->error == -ENETUNREACH || 34862306a36Sopenharmony_ci vc->error == -EHOSTUNREACH) 34962306a36Sopenharmony_ci afs_vl_dump_edestaddrreq(vc); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci afs_end_cursor(&vc->ac); 35262306a36Sopenharmony_ci afs_put_vlserverlist(net, vc->server_list); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (vc->error == -ECONNABORTED) 35562306a36Sopenharmony_ci vc->error = afs_abort_to_error(vc->ac.abort_code); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci return vc->error; 35862306a36Sopenharmony_ci} 359