18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/******************************************************************************
38c2ecf20Sopenharmony_ci*******************************************************************************
48c2ecf20Sopenharmony_ci**
58c2ecf20Sopenharmony_ci**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
68c2ecf20Sopenharmony_ci**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
78c2ecf20Sopenharmony_ci**
88c2ecf20Sopenharmony_ci**
98c2ecf20Sopenharmony_ci*******************************************************************************
108c2ecf20Sopenharmony_ci******************************************************************************/
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include "dlm_internal.h"
138c2ecf20Sopenharmony_ci#include "lockspace.h"
148c2ecf20Sopenharmony_ci#include "member.h"
158c2ecf20Sopenharmony_ci#include "lowcomms.h"
168c2ecf20Sopenharmony_ci#include "rcom.h"
178c2ecf20Sopenharmony_ci#include "config.h"
188c2ecf20Sopenharmony_ci#include "memory.h"
198c2ecf20Sopenharmony_ci#include "recover.h"
208c2ecf20Sopenharmony_ci#include "util.h"
218c2ecf20Sopenharmony_ci#include "lock.h"
228c2ecf20Sopenharmony_ci#include "dir.h"
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci/*
258c2ecf20Sopenharmony_ci * We use the upper 16 bits of the hash value to select the directory node.
268c2ecf20Sopenharmony_ci * Low bits are used for distribution of rsb's among hash buckets on each node.
278c2ecf20Sopenharmony_ci *
288c2ecf20Sopenharmony_ci * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
298c2ecf20Sopenharmony_ci * num_nodes to the hash value.  This value in the desired range is used as an
308c2ecf20Sopenharmony_ci * offset into the sorted list of nodeid's to give the particular nodeid.
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ciint dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
348c2ecf20Sopenharmony_ci{
358c2ecf20Sopenharmony_ci	uint32_t node;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci	if (ls->ls_num_nodes == 1)
388c2ecf20Sopenharmony_ci		return dlm_our_nodeid();
398c2ecf20Sopenharmony_ci	else {
408c2ecf20Sopenharmony_ci		node = (hash >> 16) % ls->ls_total_weight;
418c2ecf20Sopenharmony_ci		return ls->ls_node_array[node];
428c2ecf20Sopenharmony_ci	}
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ciint dlm_dir_nodeid(struct dlm_rsb *r)
468c2ecf20Sopenharmony_ci{
478c2ecf20Sopenharmony_ci	return r->res_dir_nodeid;
488c2ecf20Sopenharmony_ci}
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_civoid dlm_recover_dir_nodeid(struct dlm_ls *ls)
518c2ecf20Sopenharmony_ci{
528c2ecf20Sopenharmony_ci	struct dlm_rsb *r;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	down_read(&ls->ls_root_sem);
558c2ecf20Sopenharmony_ci	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
568c2ecf20Sopenharmony_ci		r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
578c2ecf20Sopenharmony_ci	}
588c2ecf20Sopenharmony_ci	up_read(&ls->ls_root_sem);
598c2ecf20Sopenharmony_ci}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ciint dlm_recover_directory(struct dlm_ls *ls)
628c2ecf20Sopenharmony_ci{
638c2ecf20Sopenharmony_ci	struct dlm_member *memb;
648c2ecf20Sopenharmony_ci	char *b, *last_name = NULL;
658c2ecf20Sopenharmony_ci	int error = -ENOMEM, last_len, nodeid, result;
668c2ecf20Sopenharmony_ci	uint16_t namelen;
678c2ecf20Sopenharmony_ci	unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	log_rinfo(ls, "dlm_recover_directory");
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	if (dlm_no_directory(ls))
728c2ecf20Sopenharmony_ci		goto out_status;
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
758c2ecf20Sopenharmony_ci	if (!last_name)
768c2ecf20Sopenharmony_ci		goto out;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	list_for_each_entry(memb, &ls->ls_nodes, list) {
798c2ecf20Sopenharmony_ci		if (memb->nodeid == dlm_our_nodeid())
808c2ecf20Sopenharmony_ci			continue;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci		memset(last_name, 0, DLM_RESNAME_MAXLEN);
838c2ecf20Sopenharmony_ci		last_len = 0;
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci		for (;;) {
868c2ecf20Sopenharmony_ci			int left;
878c2ecf20Sopenharmony_ci			error = dlm_recovery_stopped(ls);
888c2ecf20Sopenharmony_ci			if (error)
898c2ecf20Sopenharmony_ci				goto out_free;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci			error = dlm_rcom_names(ls, memb->nodeid,
928c2ecf20Sopenharmony_ci					       last_name, last_len);
938c2ecf20Sopenharmony_ci			if (error)
948c2ecf20Sopenharmony_ci				goto out_free;
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci			cond_resched();
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci			/*
998c2ecf20Sopenharmony_ci			 * pick namelen/name pairs out of received buffer
1008c2ecf20Sopenharmony_ci			 */
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci			b = ls->ls_recover_buf->rc_buf;
1038c2ecf20Sopenharmony_ci			left = ls->ls_recover_buf->rc_header.h_length;
1048c2ecf20Sopenharmony_ci			left -= sizeof(struct dlm_rcom);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci			for (;;) {
1078c2ecf20Sopenharmony_ci				__be16 v;
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci				error = -EINVAL;
1108c2ecf20Sopenharmony_ci				if (left < sizeof(__be16))
1118c2ecf20Sopenharmony_ci					goto out_free;
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci				memcpy(&v, b, sizeof(__be16));
1148c2ecf20Sopenharmony_ci				namelen = be16_to_cpu(v);
1158c2ecf20Sopenharmony_ci				b += sizeof(__be16);
1168c2ecf20Sopenharmony_ci				left -= sizeof(__be16);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci				/* namelen of 0xFFFFF marks end of names for
1198c2ecf20Sopenharmony_ci				   this node; namelen of 0 marks end of the
1208c2ecf20Sopenharmony_ci				   buffer */
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci				if (namelen == 0xFFFF)
1238c2ecf20Sopenharmony_ci					goto done;
1248c2ecf20Sopenharmony_ci				if (!namelen)
1258c2ecf20Sopenharmony_ci					break;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci				if (namelen > left)
1288c2ecf20Sopenharmony_ci					goto out_free;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci				if (namelen > DLM_RESNAME_MAXLEN)
1318c2ecf20Sopenharmony_ci					goto out_free;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci				error = dlm_master_lookup(ls, memb->nodeid,
1348c2ecf20Sopenharmony_ci							  b, namelen,
1358c2ecf20Sopenharmony_ci							  DLM_LU_RECOVER_DIR,
1368c2ecf20Sopenharmony_ci							  &nodeid, &result);
1378c2ecf20Sopenharmony_ci				if (error) {
1388c2ecf20Sopenharmony_ci					log_error(ls, "recover_dir lookup %d",
1398c2ecf20Sopenharmony_ci						  error);
1408c2ecf20Sopenharmony_ci					goto out_free;
1418c2ecf20Sopenharmony_ci				}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci				/* The name was found in rsbtbl, but the
1448c2ecf20Sopenharmony_ci				 * master nodeid is different from
1458c2ecf20Sopenharmony_ci				 * memb->nodeid which says it is the master.
1468c2ecf20Sopenharmony_ci				 * This should not happen. */
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci				if (result == DLM_LU_MATCH &&
1498c2ecf20Sopenharmony_ci				    nodeid != memb->nodeid) {
1508c2ecf20Sopenharmony_ci					count_bad++;
1518c2ecf20Sopenharmony_ci					log_error(ls, "recover_dir lookup %d "
1528c2ecf20Sopenharmony_ci						  "nodeid %d memb %d bad %u",
1538c2ecf20Sopenharmony_ci						  result, nodeid, memb->nodeid,
1548c2ecf20Sopenharmony_ci						  count_bad);
1558c2ecf20Sopenharmony_ci					print_hex_dump_bytes("dlm_recover_dir ",
1568c2ecf20Sopenharmony_ci							     DUMP_PREFIX_NONE,
1578c2ecf20Sopenharmony_ci							     b, namelen);
1588c2ecf20Sopenharmony_ci				}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci				/* The name was found in rsbtbl, and the
1618c2ecf20Sopenharmony_ci				 * master nodeid matches memb->nodeid. */
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci				if (result == DLM_LU_MATCH &&
1648c2ecf20Sopenharmony_ci				    nodeid == memb->nodeid) {
1658c2ecf20Sopenharmony_ci					count_match++;
1668c2ecf20Sopenharmony_ci				}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci				/* The name was not found in rsbtbl and was
1698c2ecf20Sopenharmony_ci				 * added with memb->nodeid as the master. */
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci				if (result == DLM_LU_ADD) {
1728c2ecf20Sopenharmony_ci					count_add++;
1738c2ecf20Sopenharmony_ci				}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci				last_len = namelen;
1768c2ecf20Sopenharmony_ci				memcpy(last_name, b, namelen);
1778c2ecf20Sopenharmony_ci				b += namelen;
1788c2ecf20Sopenharmony_ci				left -= namelen;
1798c2ecf20Sopenharmony_ci				count++;
1808c2ecf20Sopenharmony_ci			}
1818c2ecf20Sopenharmony_ci		}
1828c2ecf20Sopenharmony_ci	 done:
1838c2ecf20Sopenharmony_ci		;
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci out_status:
1878c2ecf20Sopenharmony_ci	error = 0;
1888c2ecf20Sopenharmony_ci	dlm_set_recover_status(ls, DLM_RS_DIR);
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	log_rinfo(ls, "dlm_recover_directory %u in %u new",
1918c2ecf20Sopenharmony_ci		  count, count_add);
1928c2ecf20Sopenharmony_ci out_free:
1938c2ecf20Sopenharmony_ci	kfree(last_name);
1948c2ecf20Sopenharmony_ci out:
1958c2ecf20Sopenharmony_ci	return error;
1968c2ecf20Sopenharmony_ci}
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_cistatic struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	struct dlm_rsb *r;
2018c2ecf20Sopenharmony_ci	uint32_t hash, bucket;
2028c2ecf20Sopenharmony_ci	int rv;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	hash = jhash(name, len, 0);
2058c2ecf20Sopenharmony_ci	bucket = hash & (ls->ls_rsbtbl_size - 1);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	spin_lock(&ls->ls_rsbtbl[bucket].lock);
2088c2ecf20Sopenharmony_ci	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
2098c2ecf20Sopenharmony_ci	if (rv)
2108c2ecf20Sopenharmony_ci		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
2118c2ecf20Sopenharmony_ci					 name, len, &r);
2128c2ecf20Sopenharmony_ci	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	if (!rv)
2158c2ecf20Sopenharmony_ci		return r;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	down_read(&ls->ls_root_sem);
2188c2ecf20Sopenharmony_ci	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
2198c2ecf20Sopenharmony_ci		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
2208c2ecf20Sopenharmony_ci			up_read(&ls->ls_root_sem);
2218c2ecf20Sopenharmony_ci			log_debug(ls, "find_rsb_root revert to root_list %s",
2228c2ecf20Sopenharmony_ci				  r->res_name);
2238c2ecf20Sopenharmony_ci			return r;
2248c2ecf20Sopenharmony_ci		}
2258c2ecf20Sopenharmony_ci	}
2268c2ecf20Sopenharmony_ci	up_read(&ls->ls_root_sem);
2278c2ecf20Sopenharmony_ci	return NULL;
2288c2ecf20Sopenharmony_ci}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci/* Find the rsb where we left off (or start again), then send rsb names
2318c2ecf20Sopenharmony_ci   for rsb's we're master of and whose directory node matches the requesting
2328c2ecf20Sopenharmony_ci   node.  inbuf is the rsb name last sent, inlen is the name's length */
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_civoid dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
2358c2ecf20Sopenharmony_ci 			   char *outbuf, int outlen, int nodeid)
2368c2ecf20Sopenharmony_ci{
2378c2ecf20Sopenharmony_ci	struct list_head *list;
2388c2ecf20Sopenharmony_ci	struct dlm_rsb *r;
2398c2ecf20Sopenharmony_ci	int offset = 0, dir_nodeid;
2408c2ecf20Sopenharmony_ci	__be16 be_namelen;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	down_read(&ls->ls_root_sem);
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	if (inlen > 1) {
2458c2ecf20Sopenharmony_ci		r = find_rsb_root(ls, inbuf, inlen);
2468c2ecf20Sopenharmony_ci		if (!r) {
2478c2ecf20Sopenharmony_ci			inbuf[inlen - 1] = '\0';
2488c2ecf20Sopenharmony_ci			log_error(ls, "copy_master_names from %d start %d %s",
2498c2ecf20Sopenharmony_ci				  nodeid, inlen, inbuf);
2508c2ecf20Sopenharmony_ci			goto out;
2518c2ecf20Sopenharmony_ci		}
2528c2ecf20Sopenharmony_ci		list = r->res_root_list.next;
2538c2ecf20Sopenharmony_ci	} else {
2548c2ecf20Sopenharmony_ci		list = ls->ls_root_list.next;
2558c2ecf20Sopenharmony_ci	}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
2588c2ecf20Sopenharmony_ci		r = list_entry(list, struct dlm_rsb, res_root_list);
2598c2ecf20Sopenharmony_ci		if (r->res_nodeid)
2608c2ecf20Sopenharmony_ci			continue;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci		dir_nodeid = dlm_dir_nodeid(r);
2638c2ecf20Sopenharmony_ci		if (dir_nodeid != nodeid)
2648c2ecf20Sopenharmony_ci			continue;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci		/*
2678c2ecf20Sopenharmony_ci		 * The block ends when we can't fit the following in the
2688c2ecf20Sopenharmony_ci		 * remaining buffer space:
2698c2ecf20Sopenharmony_ci		 * namelen (uint16_t) +
2708c2ecf20Sopenharmony_ci		 * name (r->res_length) +
2718c2ecf20Sopenharmony_ci		 * end-of-block record 0x0000 (uint16_t)
2728c2ecf20Sopenharmony_ci		 */
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
2758c2ecf20Sopenharmony_ci			/* Write end-of-block record */
2768c2ecf20Sopenharmony_ci			be_namelen = cpu_to_be16(0);
2778c2ecf20Sopenharmony_ci			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
2788c2ecf20Sopenharmony_ci			offset += sizeof(__be16);
2798c2ecf20Sopenharmony_ci			ls->ls_recover_dir_sent_msg++;
2808c2ecf20Sopenharmony_ci			goto out;
2818c2ecf20Sopenharmony_ci		}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci		be_namelen = cpu_to_be16(r->res_length);
2848c2ecf20Sopenharmony_ci		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
2858c2ecf20Sopenharmony_ci		offset += sizeof(__be16);
2868c2ecf20Sopenharmony_ci		memcpy(outbuf + offset, r->res_name, r->res_length);
2878c2ecf20Sopenharmony_ci		offset += r->res_length;
2888c2ecf20Sopenharmony_ci		ls->ls_recover_dir_sent_res++;
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	/*
2928c2ecf20Sopenharmony_ci	 * If we've reached the end of the list (and there's room) write a
2938c2ecf20Sopenharmony_ci	 * terminating record.
2948c2ecf20Sopenharmony_ci	 */
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	if ((list == &ls->ls_root_list) &&
2978c2ecf20Sopenharmony_ci	    (offset + sizeof(uint16_t) <= outlen)) {
2988c2ecf20Sopenharmony_ci		be_namelen = cpu_to_be16(0xFFFF);
2998c2ecf20Sopenharmony_ci		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
3008c2ecf20Sopenharmony_ci		offset += sizeof(__be16);
3018c2ecf20Sopenharmony_ci		ls->ls_recover_dir_sent_msg++;
3028c2ecf20Sopenharmony_ci	}
3038c2ecf20Sopenharmony_ci out:
3048c2ecf20Sopenharmony_ci	up_read(&ls->ls_root_sem);
3058c2ecf20Sopenharmony_ci}
3068c2ecf20Sopenharmony_ci
307