162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/****************************************************************************** 362306a36Sopenharmony_ci******************************************************************************* 462306a36Sopenharmony_ci** 562306a36Sopenharmony_ci** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 662306a36Sopenharmony_ci** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 762306a36Sopenharmony_ci** 862306a36Sopenharmony_ci** 962306a36Sopenharmony_ci******************************************************************************* 1062306a36Sopenharmony_ci******************************************************************************/ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include "dlm_internal.h" 1362306a36Sopenharmony_ci#include "lockspace.h" 1462306a36Sopenharmony_ci#include "member.h" 1562306a36Sopenharmony_ci#include "lowcomms.h" 1662306a36Sopenharmony_ci#include "rcom.h" 1762306a36Sopenharmony_ci#include "config.h" 1862306a36Sopenharmony_ci#include "memory.h" 1962306a36Sopenharmony_ci#include "recover.h" 2062306a36Sopenharmony_ci#include "util.h" 2162306a36Sopenharmony_ci#include "lock.h" 2262306a36Sopenharmony_ci#include "dir.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/* 2562306a36Sopenharmony_ci * We use the upper 16 bits of the hash value to select the directory node. 2662306a36Sopenharmony_ci * Low bits are used for distribution of rsb's among hash buckets on each node. 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of 2962306a36Sopenharmony_ci * num_nodes to the hash value. This value in the desired range is used as an 3062306a36Sopenharmony_ci * offset into the sorted list of nodeid's to give the particular nodeid. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ciint dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) 3462306a36Sopenharmony_ci{ 3562306a36Sopenharmony_ci uint32_t node; 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci if (ls->ls_num_nodes == 1) 3862306a36Sopenharmony_ci return dlm_our_nodeid(); 3962306a36Sopenharmony_ci else { 4062306a36Sopenharmony_ci node = (hash >> 16) % ls->ls_total_weight; 4162306a36Sopenharmony_ci return ls->ls_node_array[node]; 4262306a36Sopenharmony_ci } 4362306a36Sopenharmony_ci} 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ciint dlm_dir_nodeid(struct dlm_rsb *r) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci return r->res_dir_nodeid; 4862306a36Sopenharmony_ci} 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_civoid dlm_recover_dir_nodeid(struct dlm_ls *ls) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci struct dlm_rsb *r; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci down_read(&ls->ls_root_sem); 5562306a36Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 5662306a36Sopenharmony_ci r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci up_read(&ls->ls_root_sem); 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ciint dlm_recover_directory(struct dlm_ls *ls, uint64_t seq) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci struct dlm_member *memb; 6462306a36Sopenharmony_ci char *b, *last_name = NULL; 6562306a36Sopenharmony_ci int error = -ENOMEM, last_len, nodeid, result; 6662306a36Sopenharmony_ci uint16_t namelen; 6762306a36Sopenharmony_ci unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci log_rinfo(ls, "dlm_recover_directory"); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci if (dlm_no_directory(ls)) 7262306a36Sopenharmony_ci goto out_status; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); 7562306a36Sopenharmony_ci if (!last_name) 7662306a36Sopenharmony_ci goto out; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci list_for_each_entry(memb, &ls->ls_nodes, list) { 7962306a36Sopenharmony_ci if (memb->nodeid == dlm_our_nodeid()) 8062306a36Sopenharmony_ci continue; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci memset(last_name, 0, DLM_RESNAME_MAXLEN); 8362306a36Sopenharmony_ci last_len = 0; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci for (;;) { 8662306a36Sopenharmony_ci int left; 8762306a36Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 8862306a36Sopenharmony_ci error = -EINTR; 8962306a36Sopenharmony_ci goto out_free; 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci error = dlm_rcom_names(ls, memb->nodeid, 9362306a36Sopenharmony_ci last_name, last_len, seq); 9462306a36Sopenharmony_ci if (error) 9562306a36Sopenharmony_ci goto out_free; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci cond_resched(); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci /* 10062306a36Sopenharmony_ci * pick namelen/name pairs out of received buffer 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci b = ls->ls_recover_buf->rc_buf; 10462306a36Sopenharmony_ci left = le16_to_cpu(ls->ls_recover_buf->rc_header.h_length); 10562306a36Sopenharmony_ci left -= sizeof(struct dlm_rcom); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci for (;;) { 10862306a36Sopenharmony_ci __be16 v; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci error = -EINVAL; 11162306a36Sopenharmony_ci if (left < sizeof(__be16)) 11262306a36Sopenharmony_ci goto out_free; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci memcpy(&v, b, sizeof(__be16)); 11562306a36Sopenharmony_ci namelen = be16_to_cpu(v); 11662306a36Sopenharmony_ci b += sizeof(__be16); 11762306a36Sopenharmony_ci left -= sizeof(__be16); 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci /* namelen of 0xFFFFF marks end of names for 12062306a36Sopenharmony_ci this node; namelen of 0 marks end of the 12162306a36Sopenharmony_ci buffer */ 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci if (namelen == 0xFFFF) 12462306a36Sopenharmony_ci goto done; 12562306a36Sopenharmony_ci if (!namelen) 12662306a36Sopenharmony_ci break; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (namelen > left) 12962306a36Sopenharmony_ci goto out_free; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci if (namelen > DLM_RESNAME_MAXLEN) 13262306a36Sopenharmony_ci goto out_free; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci error = dlm_master_lookup(ls, memb->nodeid, 13562306a36Sopenharmony_ci b, namelen, 13662306a36Sopenharmony_ci DLM_LU_RECOVER_DIR, 13762306a36Sopenharmony_ci &nodeid, &result); 13862306a36Sopenharmony_ci if (error) { 13962306a36Sopenharmony_ci log_error(ls, "recover_dir lookup %d", 14062306a36Sopenharmony_ci error); 14162306a36Sopenharmony_ci goto out_free; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* The name was found in rsbtbl, but the 14562306a36Sopenharmony_ci * master nodeid is different from 14662306a36Sopenharmony_ci * memb->nodeid which says it is the master. 14762306a36Sopenharmony_ci * This should not happen. */ 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci if (result == DLM_LU_MATCH && 15062306a36Sopenharmony_ci nodeid != memb->nodeid) { 15162306a36Sopenharmony_ci count_bad++; 15262306a36Sopenharmony_ci log_error(ls, "recover_dir lookup %d " 15362306a36Sopenharmony_ci "nodeid %d memb %d bad %u", 15462306a36Sopenharmony_ci result, nodeid, memb->nodeid, 15562306a36Sopenharmony_ci count_bad); 15662306a36Sopenharmony_ci print_hex_dump_bytes("dlm_recover_dir ", 15762306a36Sopenharmony_ci DUMP_PREFIX_NONE, 15862306a36Sopenharmony_ci b, namelen); 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* The name was found in rsbtbl, and the 16262306a36Sopenharmony_ci * master nodeid matches memb->nodeid. */ 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci if (result == DLM_LU_MATCH && 16562306a36Sopenharmony_ci nodeid == memb->nodeid) { 16662306a36Sopenharmony_ci count_match++; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* The name was not found in rsbtbl and was 17062306a36Sopenharmony_ci * added with memb->nodeid as the master. */ 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci if (result == DLM_LU_ADD) { 17362306a36Sopenharmony_ci count_add++; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci last_len = namelen; 17762306a36Sopenharmony_ci memcpy(last_name, b, namelen); 17862306a36Sopenharmony_ci b += namelen; 17962306a36Sopenharmony_ci left -= namelen; 18062306a36Sopenharmony_ci count++; 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci } 18362306a36Sopenharmony_ci done: 18462306a36Sopenharmony_ci ; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci out_status: 18862306a36Sopenharmony_ci error = 0; 18962306a36Sopenharmony_ci dlm_set_recover_status(ls, DLM_RS_DIR); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci log_rinfo(ls, "dlm_recover_directory %u in %u new", 19262306a36Sopenharmony_ci count, count_add); 19362306a36Sopenharmony_ci out_free: 19462306a36Sopenharmony_ci kfree(last_name); 19562306a36Sopenharmony_ci out: 19662306a36Sopenharmony_ci return error; 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_cistatic struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, const char *name, 20062306a36Sopenharmony_ci int len) 20162306a36Sopenharmony_ci{ 20262306a36Sopenharmony_ci struct dlm_rsb *r; 20362306a36Sopenharmony_ci uint32_t hash, bucket; 20462306a36Sopenharmony_ci int rv; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci hash = jhash(name, len, 0); 20762306a36Sopenharmony_ci bucket = hash & (ls->ls_rsbtbl_size - 1); 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci spin_lock(&ls->ls_rsbtbl[bucket].lock); 21062306a36Sopenharmony_ci rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); 21162306a36Sopenharmony_ci if (rv) 21262306a36Sopenharmony_ci rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, 21362306a36Sopenharmony_ci name, len, &r); 21462306a36Sopenharmony_ci spin_unlock(&ls->ls_rsbtbl[bucket].lock); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (!rv) 21762306a36Sopenharmony_ci return r; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci down_read(&ls->ls_root_sem); 22062306a36Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 22162306a36Sopenharmony_ci if (len == r->res_length && !memcmp(name, r->res_name, len)) { 22262306a36Sopenharmony_ci up_read(&ls->ls_root_sem); 22362306a36Sopenharmony_ci log_debug(ls, "find_rsb_root revert to root_list %s", 22462306a36Sopenharmony_ci r->res_name); 22562306a36Sopenharmony_ci return r; 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci up_read(&ls->ls_root_sem); 22962306a36Sopenharmony_ci return NULL; 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci/* Find the rsb where we left off (or start again), then send rsb names 23362306a36Sopenharmony_ci for rsb's we're master of and whose directory node matches the requesting 23462306a36Sopenharmony_ci node. inbuf is the rsb name last sent, inlen is the name's length */ 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_civoid dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen, 23762306a36Sopenharmony_ci char *outbuf, int outlen, int nodeid) 23862306a36Sopenharmony_ci{ 23962306a36Sopenharmony_ci struct list_head *list; 24062306a36Sopenharmony_ci struct dlm_rsb *r; 24162306a36Sopenharmony_ci int offset = 0, dir_nodeid; 24262306a36Sopenharmony_ci __be16 be_namelen; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci down_read(&ls->ls_root_sem); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci if (inlen > 1) { 24762306a36Sopenharmony_ci r = find_rsb_root(ls, inbuf, inlen); 24862306a36Sopenharmony_ci if (!r) { 24962306a36Sopenharmony_ci log_error(ls, "copy_master_names from %d start %d %.*s", 25062306a36Sopenharmony_ci nodeid, inlen, inlen, inbuf); 25162306a36Sopenharmony_ci goto out; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci list = r->res_root_list.next; 25462306a36Sopenharmony_ci } else { 25562306a36Sopenharmony_ci list = ls->ls_root_list.next; 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci for (offset = 0; list != &ls->ls_root_list; list = list->next) { 25962306a36Sopenharmony_ci r = list_entry(list, struct dlm_rsb, res_root_list); 26062306a36Sopenharmony_ci if (r->res_nodeid) 26162306a36Sopenharmony_ci continue; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci dir_nodeid = dlm_dir_nodeid(r); 26462306a36Sopenharmony_ci if (dir_nodeid != nodeid) 26562306a36Sopenharmony_ci continue; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci /* 26862306a36Sopenharmony_ci * The block ends when we can't fit the following in the 26962306a36Sopenharmony_ci * remaining buffer space: 27062306a36Sopenharmony_ci * namelen (uint16_t) + 27162306a36Sopenharmony_ci * name (r->res_length) + 27262306a36Sopenharmony_ci * end-of-block record 0x0000 (uint16_t) 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { 27662306a36Sopenharmony_ci /* Write end-of-block record */ 27762306a36Sopenharmony_ci be_namelen = cpu_to_be16(0); 27862306a36Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 27962306a36Sopenharmony_ci offset += sizeof(__be16); 28062306a36Sopenharmony_ci ls->ls_recover_dir_sent_msg++; 28162306a36Sopenharmony_ci goto out; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci be_namelen = cpu_to_be16(r->res_length); 28562306a36Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 28662306a36Sopenharmony_ci offset += sizeof(__be16); 28762306a36Sopenharmony_ci memcpy(outbuf + offset, r->res_name, r->res_length); 28862306a36Sopenharmony_ci offset += r->res_length; 28962306a36Sopenharmony_ci ls->ls_recover_dir_sent_res++; 29062306a36Sopenharmony_ci } 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci /* 29362306a36Sopenharmony_ci * If we've reached the end of the list (and there's room) write a 29462306a36Sopenharmony_ci * terminating record. 29562306a36Sopenharmony_ci */ 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if ((list == &ls->ls_root_list) && 29862306a36Sopenharmony_ci (offset + sizeof(uint16_t) <= outlen)) { 29962306a36Sopenharmony_ci be_namelen = cpu_to_be16(0xFFFF); 30062306a36Sopenharmony_ci memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); 30162306a36Sopenharmony_ci offset += sizeof(__be16); 30262306a36Sopenharmony_ci ls->ls_recover_dir_sent_msg++; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci out: 30562306a36Sopenharmony_ci up_read(&ls->ls_root_sem); 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 308