162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/******************************************************************************
362306a36Sopenharmony_ci*******************************************************************************
462306a36Sopenharmony_ci**
562306a36Sopenharmony_ci**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
662306a36Sopenharmony_ci**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
762306a36Sopenharmony_ci**
862306a36Sopenharmony_ci**
962306a36Sopenharmony_ci*******************************************************************************
1062306a36Sopenharmony_ci******************************************************************************/
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include "dlm_internal.h"
1362306a36Sopenharmony_ci#include "lockspace.h"
1462306a36Sopenharmony_ci#include "member.h"
1562306a36Sopenharmony_ci#include "lowcomms.h"
1662306a36Sopenharmony_ci#include "rcom.h"
1762306a36Sopenharmony_ci#include "config.h"
1862306a36Sopenharmony_ci#include "memory.h"
1962306a36Sopenharmony_ci#include "recover.h"
2062306a36Sopenharmony_ci#include "util.h"
2162306a36Sopenharmony_ci#include "lock.h"
2262306a36Sopenharmony_ci#include "dir.h"
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci/*
2562306a36Sopenharmony_ci * We use the upper 16 bits of the hash value to select the directory node.
2662306a36Sopenharmony_ci * Low bits are used for distribution of rsb's among hash buckets on each node.
2762306a36Sopenharmony_ci *
2862306a36Sopenharmony_ci * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
2962306a36Sopenharmony_ci * num_nodes to the hash value.  This value in the desired range is used as an
3062306a36Sopenharmony_ci * offset into the sorted list of nodeid's to give the particular nodeid.
3162306a36Sopenharmony_ci */
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciint dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	uint32_t node;
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	if (ls->ls_num_nodes == 1)
3862306a36Sopenharmony_ci		return dlm_our_nodeid();
3962306a36Sopenharmony_ci	else {
4062306a36Sopenharmony_ci		node = (hash >> 16) % ls->ls_total_weight;
4162306a36Sopenharmony_ci		return ls->ls_node_array[node];
4262306a36Sopenharmony_ci	}
4362306a36Sopenharmony_ci}
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ciint dlm_dir_nodeid(struct dlm_rsb *r)
4662306a36Sopenharmony_ci{
4762306a36Sopenharmony_ci	return r->res_dir_nodeid;
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_civoid dlm_recover_dir_nodeid(struct dlm_ls *ls)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	struct dlm_rsb *r;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	down_read(&ls->ls_root_sem);
5562306a36Sopenharmony_ci	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
5662306a36Sopenharmony_ci		r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
5762306a36Sopenharmony_ci	}
5862306a36Sopenharmony_ci	up_read(&ls->ls_root_sem);
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ciint dlm_recover_directory(struct dlm_ls *ls, uint64_t seq)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	struct dlm_member *memb;
6462306a36Sopenharmony_ci	char *b, *last_name = NULL;
6562306a36Sopenharmony_ci	int error = -ENOMEM, last_len, nodeid, result;
6662306a36Sopenharmony_ci	uint16_t namelen;
6762306a36Sopenharmony_ci	unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	log_rinfo(ls, "dlm_recover_directory");
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	if (dlm_no_directory(ls))
7262306a36Sopenharmony_ci		goto out_status;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
7562306a36Sopenharmony_ci	if (!last_name)
7662306a36Sopenharmony_ci		goto out;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	list_for_each_entry(memb, &ls->ls_nodes, list) {
7962306a36Sopenharmony_ci		if (memb->nodeid == dlm_our_nodeid())
8062306a36Sopenharmony_ci			continue;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci		memset(last_name, 0, DLM_RESNAME_MAXLEN);
8362306a36Sopenharmony_ci		last_len = 0;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci		for (;;) {
8662306a36Sopenharmony_ci			int left;
8762306a36Sopenharmony_ci			if (dlm_recovery_stopped(ls)) {
8862306a36Sopenharmony_ci				error = -EINTR;
8962306a36Sopenharmony_ci				goto out_free;
9062306a36Sopenharmony_ci			}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci			error = dlm_rcom_names(ls, memb->nodeid,
9362306a36Sopenharmony_ci					       last_name, last_len, seq);
9462306a36Sopenharmony_ci			if (error)
9562306a36Sopenharmony_ci				goto out_free;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci			cond_resched();
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci			/*
10062306a36Sopenharmony_ci			 * pick namelen/name pairs out of received buffer
10162306a36Sopenharmony_ci			 */
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci			b = ls->ls_recover_buf->rc_buf;
10462306a36Sopenharmony_ci			left = le16_to_cpu(ls->ls_recover_buf->rc_header.h_length);
10562306a36Sopenharmony_ci			left -= sizeof(struct dlm_rcom);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci			for (;;) {
10862306a36Sopenharmony_ci				__be16 v;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci				error = -EINVAL;
11162306a36Sopenharmony_ci				if (left < sizeof(__be16))
11262306a36Sopenharmony_ci					goto out_free;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci				memcpy(&v, b, sizeof(__be16));
11562306a36Sopenharmony_ci				namelen = be16_to_cpu(v);
11662306a36Sopenharmony_ci				b += sizeof(__be16);
11762306a36Sopenharmony_ci				left -= sizeof(__be16);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci				/* namelen of 0xFFFFF marks end of names for
12062306a36Sopenharmony_ci				   this node; namelen of 0 marks end of the
12162306a36Sopenharmony_ci				   buffer */
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci				if (namelen == 0xFFFF)
12462306a36Sopenharmony_ci					goto done;
12562306a36Sopenharmony_ci				if (!namelen)
12662306a36Sopenharmony_ci					break;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci				if (namelen > left)
12962306a36Sopenharmony_ci					goto out_free;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci				if (namelen > DLM_RESNAME_MAXLEN)
13262306a36Sopenharmony_ci					goto out_free;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci				error = dlm_master_lookup(ls, memb->nodeid,
13562306a36Sopenharmony_ci							  b, namelen,
13662306a36Sopenharmony_ci							  DLM_LU_RECOVER_DIR,
13762306a36Sopenharmony_ci							  &nodeid, &result);
13862306a36Sopenharmony_ci				if (error) {
13962306a36Sopenharmony_ci					log_error(ls, "recover_dir lookup %d",
14062306a36Sopenharmony_ci						  error);
14162306a36Sopenharmony_ci					goto out_free;
14262306a36Sopenharmony_ci				}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci				/* The name was found in rsbtbl, but the
14562306a36Sopenharmony_ci				 * master nodeid is different from
14662306a36Sopenharmony_ci				 * memb->nodeid which says it is the master.
14762306a36Sopenharmony_ci				 * This should not happen. */
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci				if (result == DLM_LU_MATCH &&
15062306a36Sopenharmony_ci				    nodeid != memb->nodeid) {
15162306a36Sopenharmony_ci					count_bad++;
15262306a36Sopenharmony_ci					log_error(ls, "recover_dir lookup %d "
15362306a36Sopenharmony_ci						  "nodeid %d memb %d bad %u",
15462306a36Sopenharmony_ci						  result, nodeid, memb->nodeid,
15562306a36Sopenharmony_ci						  count_bad);
15662306a36Sopenharmony_ci					print_hex_dump_bytes("dlm_recover_dir ",
15762306a36Sopenharmony_ci							     DUMP_PREFIX_NONE,
15862306a36Sopenharmony_ci							     b, namelen);
15962306a36Sopenharmony_ci				}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci				/* The name was found in rsbtbl, and the
16262306a36Sopenharmony_ci				 * master nodeid matches memb->nodeid. */
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci				if (result == DLM_LU_MATCH &&
16562306a36Sopenharmony_ci				    nodeid == memb->nodeid) {
16662306a36Sopenharmony_ci					count_match++;
16762306a36Sopenharmony_ci				}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci				/* The name was not found in rsbtbl and was
17062306a36Sopenharmony_ci				 * added with memb->nodeid as the master. */
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci				if (result == DLM_LU_ADD) {
17362306a36Sopenharmony_ci					count_add++;
17462306a36Sopenharmony_ci				}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci				last_len = namelen;
17762306a36Sopenharmony_ci				memcpy(last_name, b, namelen);
17862306a36Sopenharmony_ci				b += namelen;
17962306a36Sopenharmony_ci				left -= namelen;
18062306a36Sopenharmony_ci				count++;
18162306a36Sopenharmony_ci			}
18262306a36Sopenharmony_ci		}
18362306a36Sopenharmony_ci	 done:
18462306a36Sopenharmony_ci		;
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci out_status:
18862306a36Sopenharmony_ci	error = 0;
18962306a36Sopenharmony_ci	dlm_set_recover_status(ls, DLM_RS_DIR);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	log_rinfo(ls, "dlm_recover_directory %u in %u new",
19262306a36Sopenharmony_ci		  count, count_add);
19362306a36Sopenharmony_ci out_free:
19462306a36Sopenharmony_ci	kfree(last_name);
19562306a36Sopenharmony_ci out:
19662306a36Sopenharmony_ci	return error;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, const char *name,
20062306a36Sopenharmony_ci				     int len)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	struct dlm_rsb *r;
20362306a36Sopenharmony_ci	uint32_t hash, bucket;
20462306a36Sopenharmony_ci	int rv;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	hash = jhash(name, len, 0);
20762306a36Sopenharmony_ci	bucket = hash & (ls->ls_rsbtbl_size - 1);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	spin_lock(&ls->ls_rsbtbl[bucket].lock);
21062306a36Sopenharmony_ci	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
21162306a36Sopenharmony_ci	if (rv)
21262306a36Sopenharmony_ci		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
21362306a36Sopenharmony_ci					 name, len, &r);
21462306a36Sopenharmony_ci	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (!rv)
21762306a36Sopenharmony_ci		return r;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	down_read(&ls->ls_root_sem);
22062306a36Sopenharmony_ci	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
22162306a36Sopenharmony_ci		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
22262306a36Sopenharmony_ci			up_read(&ls->ls_root_sem);
22362306a36Sopenharmony_ci			log_debug(ls, "find_rsb_root revert to root_list %s",
22462306a36Sopenharmony_ci				  r->res_name);
22562306a36Sopenharmony_ci			return r;
22662306a36Sopenharmony_ci		}
22762306a36Sopenharmony_ci	}
22862306a36Sopenharmony_ci	up_read(&ls->ls_root_sem);
22962306a36Sopenharmony_ci	return NULL;
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci/* Find the rsb where we left off (or start again), then send rsb names
23362306a36Sopenharmony_ci   for rsb's we're master of and whose directory node matches the requesting
23462306a36Sopenharmony_ci   node.  inbuf is the rsb name last sent, inlen is the name's length */
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_civoid dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
23762306a36Sopenharmony_ci 			   char *outbuf, int outlen, int nodeid)
23862306a36Sopenharmony_ci{
23962306a36Sopenharmony_ci	struct list_head *list;
24062306a36Sopenharmony_ci	struct dlm_rsb *r;
24162306a36Sopenharmony_ci	int offset = 0, dir_nodeid;
24262306a36Sopenharmony_ci	__be16 be_namelen;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	down_read(&ls->ls_root_sem);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	if (inlen > 1) {
24762306a36Sopenharmony_ci		r = find_rsb_root(ls, inbuf, inlen);
24862306a36Sopenharmony_ci		if (!r) {
24962306a36Sopenharmony_ci			log_error(ls, "copy_master_names from %d start %d %.*s",
25062306a36Sopenharmony_ci				  nodeid, inlen, inlen, inbuf);
25162306a36Sopenharmony_ci			goto out;
25262306a36Sopenharmony_ci		}
25362306a36Sopenharmony_ci		list = r->res_root_list.next;
25462306a36Sopenharmony_ci	} else {
25562306a36Sopenharmony_ci		list = ls->ls_root_list.next;
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
25962306a36Sopenharmony_ci		r = list_entry(list, struct dlm_rsb, res_root_list);
26062306a36Sopenharmony_ci		if (r->res_nodeid)
26162306a36Sopenharmony_ci			continue;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci		dir_nodeid = dlm_dir_nodeid(r);
26462306a36Sopenharmony_ci		if (dir_nodeid != nodeid)
26562306a36Sopenharmony_ci			continue;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci		/*
26862306a36Sopenharmony_ci		 * The block ends when we can't fit the following in the
26962306a36Sopenharmony_ci		 * remaining buffer space:
27062306a36Sopenharmony_ci		 * namelen (uint16_t) +
27162306a36Sopenharmony_ci		 * name (r->res_length) +
27262306a36Sopenharmony_ci		 * end-of-block record 0x0000 (uint16_t)
27362306a36Sopenharmony_ci		 */
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
27662306a36Sopenharmony_ci			/* Write end-of-block record */
27762306a36Sopenharmony_ci			be_namelen = cpu_to_be16(0);
27862306a36Sopenharmony_ci			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
27962306a36Sopenharmony_ci			offset += sizeof(__be16);
28062306a36Sopenharmony_ci			ls->ls_recover_dir_sent_msg++;
28162306a36Sopenharmony_ci			goto out;
28262306a36Sopenharmony_ci		}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci		be_namelen = cpu_to_be16(r->res_length);
28562306a36Sopenharmony_ci		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
28662306a36Sopenharmony_ci		offset += sizeof(__be16);
28762306a36Sopenharmony_ci		memcpy(outbuf + offset, r->res_name, r->res_length);
28862306a36Sopenharmony_ci		offset += r->res_length;
28962306a36Sopenharmony_ci		ls->ls_recover_dir_sent_res++;
29062306a36Sopenharmony_ci	}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	/*
29362306a36Sopenharmony_ci	 * If we've reached the end of the list (and there's room) write a
29462306a36Sopenharmony_ci	 * terminating record.
29562306a36Sopenharmony_ci	 */
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	if ((list == &ls->ls_root_list) &&
29862306a36Sopenharmony_ci	    (offset + sizeof(uint16_t) <= outlen)) {
29962306a36Sopenharmony_ci		be_namelen = cpu_to_be16(0xFFFF);
30062306a36Sopenharmony_ci		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
30162306a36Sopenharmony_ci		offset += sizeof(__be16);
30262306a36Sopenharmony_ci		ls->ls_recover_dir_sent_msg++;
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci out:
30562306a36Sopenharmony_ci	up_read(&ls->ls_root_sem);
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
308