18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/****************************************************************************** 38c2ecf20Sopenharmony_ci******************************************************************************* 48c2ecf20Sopenharmony_ci** 58c2ecf20Sopenharmony_ci** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 68c2ecf20Sopenharmony_ci** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 78c2ecf20Sopenharmony_ci** 88c2ecf20Sopenharmony_ci** 98c2ecf20Sopenharmony_ci******************************************************************************* 108c2ecf20Sopenharmony_ci******************************************************************************/ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include "dlm_internal.h" 138c2ecf20Sopenharmony_ci#include "lockspace.h" 148c2ecf20Sopenharmony_ci#include "member.h" 158c2ecf20Sopenharmony_ci#include "lowcomms.h" 168c2ecf20Sopenharmony_ci#include "rcom.h" 178c2ecf20Sopenharmony_ci#include "config.h" 188c2ecf20Sopenharmony_ci#include "memory.h" 198c2ecf20Sopenharmony_ci#include "recover.h" 208c2ecf20Sopenharmony_ci#include "util.h" 218c2ecf20Sopenharmony_ci#include "lock.h" 228c2ecf20Sopenharmony_ci#include "dir.h" 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/* 258c2ecf20Sopenharmony_ci * We use the upper 16 bits of the hash value to select the directory node. 268c2ecf20Sopenharmony_ci * Low bits are used for distribution of rsb's among hash buckets on each node. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of 298c2ecf20Sopenharmony_ci * num_nodes to the hash value. This value in the desired range is used as an 308c2ecf20Sopenharmony_ci * offset into the sorted list of nodeid's to give the particular nodeid. 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ciint dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci uint32_t node; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci if (ls->ls_num_nodes == 1) 388c2ecf20Sopenharmony_ci return dlm_our_nodeid(); 398c2ecf20Sopenharmony_ci else { 408c2ecf20Sopenharmony_ci node = (hash >> 16) % ls->ls_total_weight; 418c2ecf20Sopenharmony_ci return ls->ls_node_array[node]; 428c2ecf20Sopenharmony_ci } 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ciint dlm_dir_nodeid(struct dlm_rsb *r) 468c2ecf20Sopenharmony_ci{ 478c2ecf20Sopenharmony_ci return r->res_dir_nodeid; 488c2ecf20Sopenharmony_ci} 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_civoid dlm_recover_dir_nodeid(struct dlm_ls *ls) 518c2ecf20Sopenharmony_ci{ 528c2ecf20Sopenharmony_ci struct dlm_rsb *r; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 558c2ecf20Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 568c2ecf20Sopenharmony_ci r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); 578c2ecf20Sopenharmony_ci } 588c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 598c2ecf20Sopenharmony_ci} 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ciint dlm_recover_directory(struct dlm_ls *ls) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci struct dlm_member *memb; 648c2ecf20Sopenharmony_ci char *b, *last_name = NULL; 658c2ecf20Sopenharmony_ci int error = -ENOMEM, last_len, nodeid, result; 668c2ecf20Sopenharmony_ci uint16_t namelen; 678c2ecf20Sopenharmony_ci unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_directory"); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci if (dlm_no_directory(ls)) 728c2ecf20Sopenharmony_ci goto out_status; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); 758c2ecf20Sopenharmony_ci if (!last_name) 768c2ecf20Sopenharmony_ci goto out; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci list_for_each_entry(memb, &ls->ls_nodes, list) { 798c2ecf20Sopenharmony_ci if (memb->nodeid == dlm_our_nodeid()) 808c2ecf20Sopenharmony_ci continue; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci memset(last_name, 0, DLM_RESNAME_MAXLEN); 838c2ecf20Sopenharmony_ci last_len = 0; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci for (;;) { 868c2ecf20Sopenharmony_ci int left; 878c2ecf20Sopenharmony_ci error = dlm_recovery_stopped(ls); 888c2ecf20Sopenharmony_ci if (error) 898c2ecf20Sopenharmony_ci goto out_free; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci error = dlm_rcom_names(ls, memb->nodeid, 928c2ecf20Sopenharmony_ci last_name, last_len); 938c2ecf20Sopenharmony_ci if (error) 948c2ecf20Sopenharmony_ci goto out_free; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci cond_resched(); 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci /* 998c2ecf20Sopenharmony_ci * pick namelen/name pairs out of received buffer 1008c2ecf20Sopenharmony_ci */ 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci b = ls->ls_recover_buf->rc_buf; 1038c2ecf20Sopenharmony_ci left = ls->ls_recover_buf->rc_header.h_length; 1048c2ecf20Sopenharmony_ci left -= sizeof(struct dlm_rcom); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci for (;;) { 1078c2ecf20Sopenharmony_ci __be16 v; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci error = -EINVAL; 1108c2ecf20Sopenharmony_ci if (left < sizeof(__be16)) 1118c2ecf20Sopenharmony_ci goto out_free; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci memcpy(&v, b, sizeof(__be16)); 1148c2ecf20Sopenharmony_ci namelen = be16_to_cpu(v); 1158c2ecf20Sopenharmony_ci b += sizeof(__be16); 1168c2ecf20Sopenharmony_ci left -= sizeof(__be16); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci /* namelen of 0xFFFFF marks end of names for 1198c2ecf20Sopenharmony_ci this node; namelen of 0 marks end of the 1208c2ecf20Sopenharmony_ci buffer */ 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci if (namelen == 0xFFFF) 1238c2ecf20Sopenharmony_ci goto done; 1248c2ecf20Sopenharmony_ci if (!namelen) 1258c2ecf20Sopenharmony_ci break; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci if (namelen > left) 1288c2ecf20Sopenharmony_ci goto out_free; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci if (namelen > DLM_RESNAME_MAXLEN) 1318c2ecf20Sopenharmony_ci goto out_free; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci error = dlm_master_lookup(ls, memb->nodeid, 1348c2ecf20Sopenharmony_ci b, namelen, 1358c2ecf20Sopenharmony_ci DLM_LU_RECOVER_DIR, 1368c2ecf20Sopenharmony_ci &nodeid, &result); 1378c2ecf20Sopenharmony_ci if (error) { 1388c2ecf20Sopenharmony_ci log_error(ls, "recover_dir lookup %d", 1398c2ecf20Sopenharmony_ci error); 1408c2ecf20Sopenharmony_ci goto out_free; 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci /* The name was found in rsbtbl, but the 1448c2ecf20Sopenharmony_ci * master nodeid is different from 1458c2ecf20Sopenharmony_ci * memb->nodeid which says it is the master. 1468c2ecf20Sopenharmony_ci * This should not happen. */ 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci if (result == DLM_LU_MATCH && 1498c2ecf20Sopenharmony_ci nodeid != memb->nodeid) { 1508c2ecf20Sopenharmony_ci count_bad++; 1518c2ecf20Sopenharmony_ci log_error(ls, "recover_dir lookup %d " 1528c2ecf20Sopenharmony_ci "nodeid %d memb %d bad %u", 1538c2ecf20Sopenharmony_ci result, nodeid, memb->nodeid, 1548c2ecf20Sopenharmony_ci count_bad); 1558c2ecf20Sopenharmony_ci print_hex_dump_bytes("dlm_recover_dir ", 1568c2ecf20Sopenharmony_ci DUMP_PREFIX_NONE, 1578c2ecf20Sopenharmony_ci b, namelen); 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci /* The name was found in rsbtbl, and the 1618c2ecf20Sopenharmony_ci * master nodeid matches memb->nodeid. */ 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (result == DLM_LU_MATCH && 1648c2ecf20Sopenharmony_ci nodeid == memb->nodeid) { 1658c2ecf20Sopenharmony_ci count_match++; 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci /* The name was not found in rsbtbl and was 1698c2ecf20Sopenharmony_ci * added with memb->nodeid as the master. */ 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if (result == DLM_LU_ADD) { 1728c2ecf20Sopenharmony_ci count_add++; 1738c2ecf20Sopenharmony_ci } 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci last_len = namelen; 1768c2ecf20Sopenharmony_ci memcpy(last_name, b, namelen); 1778c2ecf20Sopenharmony_ci b += namelen; 1788c2ecf20Sopenharmony_ci left -= namelen; 1798c2ecf20Sopenharmony_ci count++; 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci done: 1838c2ecf20Sopenharmony_ci ; 1848c2ecf20Sopenharmony_ci } 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci out_status: 1878c2ecf20Sopenharmony_ci error = 0; 1888c2ecf20Sopenharmony_ci dlm_set_recover_status(ls, DLM_RS_DIR); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_directory %u in %u new", 1918c2ecf20Sopenharmony_ci count, count_add); 1928c2ecf20Sopenharmony_ci out_free: 1938c2ecf20Sopenharmony_ci kfree(last_name); 1948c2ecf20Sopenharmony_ci out: 1958c2ecf20Sopenharmony_ci return error; 1968c2ecf20Sopenharmony_ci} 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cistatic struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci struct dlm_rsb *r; 2018c2ecf20Sopenharmony_ci uint32_t hash, bucket; 2028c2ecf20Sopenharmony_ci int rv; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci hash = jhash(name, len, 0); 2058c2ecf20Sopenharmony_ci bucket = hash & (ls->ls_rsbtbl_size - 1); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci spin_lock(&ls->ls_rsbtbl[bucket].lock); 2088c2ecf20Sopenharmony_ci rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); 2098c2ecf20Sopenharmony_ci if (rv) 2108c2ecf20Sopenharmony_ci rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, 2118c2ecf20Sopenharmony_ci name, len, &r); 2128c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_rsbtbl[bucket].lock); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci if (!rv) 2158c2ecf20Sopenharmony_ci return r; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 2188c2ecf20Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 2198c2ecf20Sopenharmony_ci if (len == r->res_length && !memcmp(name, r->res_name, len)) { 2208c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 2218c2ecf20Sopenharmony_ci log_debug(ls, "find_rsb_root revert to root_list %s", 2228c2ecf20Sopenharmony_ci r->res_name); 2238c2ecf20Sopenharmony_ci return r; 2248c2ecf20Sopenharmony_ci } 2258c2ecf20Sopenharmony_ci } 2268c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 2278c2ecf20Sopenharmony_ci return NULL; 2288c2ecf20Sopenharmony_ci} 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci/* Find the rsb where we left off (or start again), then send rsb names 2318c2ecf20Sopenharmony_ci for rsb's we're master of and whose directory node matches the requesting 2328c2ecf20Sopenharmony_ci node. inbuf is the rsb name last sent, inlen is the name's length */ 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_civoid dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, 2358c2ecf20Sopenharmony_ci char *outbuf, int outlen, int nodeid) 2368c2ecf20Sopenharmony_ci{ 2378c2ecf20Sopenharmony_ci struct list_head *list; 2388c2ecf20Sopenharmony_ci struct dlm_rsb *r; 2398c2ecf20Sopenharmony_ci int offset = 0, dir_nodeid; 2408c2ecf20Sopenharmony_ci __be16 be_namelen; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci if (inlen > 1) { 2458c2ecf20Sopenharmony_ci r = find_rsb_root(ls, inbuf, inlen); 2468c2ecf20Sopenharmony_ci if (!r) { 2478c2ecf20Sopenharmony_ci inbuf[inlen - 1] = '\0'; 2488c2ecf20Sopenharmony_ci log_error(ls, "copy_master_names from %d start %d %s", 2498c2ecf20Sopenharmony_ci nodeid, inlen, inbuf); 2508c2ecf20Sopenharmony_ci goto out; 2518c2ecf20Sopenharmony_ci } 2528c2ecf20Sopenharmony_ci list = r->res_root_list.next; 2538c2ecf20Sopenharmony_ci } else { 2548c2ecf20Sopenharmony_ci list = ls->ls_root_list.next; 2558c2ecf20Sopenharmony_ci } 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci for (offset = 0; list != &ls->ls_root_list; list = list->next) { 2588c2ecf20Sopenharmony_ci r = list_entry(list, struct dlm_rsb, res_root_list); 2598c2ecf20Sopenharmony_ci if (r->res_nodeid) 2608c2ecf20Sopenharmony_ci continue; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci dir_nodeid = dlm_dir_nodeid(r); 2638c2ecf20Sopenharmony_ci if (dir_nodeid != nodeid) 2648c2ecf20Sopenharmony_ci continue; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci /* 2678c2ecf20Sopenharmony_ci * The block ends when we can't fit the following in the 2688c2ecf20Sopenharmony_ci * remaining buffer space: 2698c2ecf20Sopenharmony_ci * namelen (uint16_t) + 2708c2ecf20Sopenharmony_ci * name (r->res_length) + 2718c2ecf20Sopenharmony_ci * end-of-block record 0x0000 (uint16_t) 2728c2ecf20Sopenharmony_ci */ 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { 2758c2ecf20Sopenharmony_ci /* Write end-of-block record */ 2768c2ecf20Sopenharmony_ci be_namelen = cpu_to_be16(0); 2778c2ecf20Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 2788c2ecf20Sopenharmony_ci offset += sizeof(__be16); 2798c2ecf20Sopenharmony_ci ls->ls_recover_dir_sent_msg++; 2808c2ecf20Sopenharmony_ci goto out; 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci be_namelen = cpu_to_be16(r->res_length); 2848c2ecf20Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 2858c2ecf20Sopenharmony_ci offset += sizeof(__be16); 2868c2ecf20Sopenharmony_ci memcpy(outbuf + offset, r->res_name, r->res_length); 2878c2ecf20Sopenharmony_ci offset += r->res_length; 2888c2ecf20Sopenharmony_ci ls->ls_recover_dir_sent_res++; 2898c2ecf20Sopenharmony_ci } 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci /* 2928c2ecf20Sopenharmony_ci * If we've reached the end of the list (and there's room) write a 2938c2ecf20Sopenharmony_ci * terminating record. 2948c2ecf20Sopenharmony_ci */ 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci if ((list == &ls->ls_root_list) && 2978c2ecf20Sopenharmony_ci (offset + sizeof(uint16_t) <= outlen)) { 2988c2ecf20Sopenharmony_ci be_namelen = cpu_to_be16(0xFFFF); 2998c2ecf20Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 3008c2ecf20Sopenharmony_ci offset += sizeof(__be16); 3018c2ecf20Sopenharmony_ci ls->ls_recover_dir_sent_msg++; 3028c2ecf20Sopenharmony_ci } 3038c2ecf20Sopenharmony_ci out: 3048c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 3058c2ecf20Sopenharmony_ci} 3068c2ecf20Sopenharmony_ci 307