xref: /kernel/linux/linux-5.10/fs/ocfs2/slot_map.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* -*- mode: c; c-basic-offset: 8; -*-
3 * vim: noexpandtab sw=8 ts=8 sts=0:
4 *
5 * slot_map.c
6 *
7 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
8 */
9
10#include <linux/types.h>
11#include <linux/slab.h>
12#include <linux/highmem.h>
13
14#include <cluster/masklog.h>
15
16#include "ocfs2.h"
17
18#include "dlmglue.h"
19#include "extent_map.h"
20#include "heartbeat.h"
21#include "inode.h"
22#include "slot_map.h"
23#include "super.h"
24#include "sysfile.h"
25#include "ocfs2_trace.h"
26
27#include "buffer_head_io.h"
28
29
30struct ocfs2_slot {
31	int sl_valid;
32	unsigned int sl_node_num;
33};
34
35struct ocfs2_slot_info {
36	int si_extended;
37	int si_slots_per_block;
38	struct inode *si_inode;
39	unsigned int si_blocks;
40	struct buffer_head **si_bh;
41	unsigned int si_num_slots;
42	struct ocfs2_slot si_slots[];
43};
44
45
46static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
47				    unsigned int node_num);
48
49static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
50				  int slot_num)
51{
52	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
53	si->si_slots[slot_num].sl_valid = 0;
54}
55
56static void ocfs2_set_slot(struct ocfs2_slot_info *si,
57			   int slot_num, unsigned int node_num)
58{
59	BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
60
61	si->si_slots[slot_num].sl_valid = 1;
62	si->si_slots[slot_num].sl_node_num = node_num;
63}
64
65/* This version is for the extended slot map */
66static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
67{
68	int b, i, slotno;
69	struct ocfs2_slot_map_extended *se;
70
71	slotno = 0;
72	for (b = 0; b < si->si_blocks; b++) {
73		se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
74		for (i = 0;
75		     (i < si->si_slots_per_block) &&
76		     (slotno < si->si_num_slots);
77		     i++, slotno++) {
78			if (se->se_slots[i].es_valid)
79				ocfs2_set_slot(si, slotno,
80					       le32_to_cpu(se->se_slots[i].es_node_num));
81			else
82				ocfs2_invalidate_slot(si, slotno);
83		}
84	}
85}
86
87/*
88 * Post the slot information on disk into our slot_info struct.
89 * Must be protected by osb_lock.
90 */
91static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
92{
93	int i;
94	struct ocfs2_slot_map *sm;
95
96	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
97
98	for (i = 0; i < si->si_num_slots; i++) {
99		if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
100			ocfs2_invalidate_slot(si, i);
101		else
102			ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
103	}
104}
105
106static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
107{
108	/*
109	 * The slot data will have been refreshed when ocfs2_super_lock
110	 * was taken.
111	 */
112	if (si->si_extended)
113		ocfs2_update_slot_info_extended(si);
114	else
115		ocfs2_update_slot_info_old(si);
116}
117
118int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
119{
120	int ret;
121	struct ocfs2_slot_info *si = osb->slot_info;
122
123	if (si == NULL)
124		return 0;
125
126	BUG_ON(si->si_blocks == 0);
127	BUG_ON(si->si_bh == NULL);
128
129	trace_ocfs2_refresh_slot_info(si->si_blocks);
130
131	/*
132	 * We pass -1 as blocknr because we expect all of si->si_bh to
133	 * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
134	 * this is not true, the read of -1 (UINT64_MAX) will fail.
135	 */
136	ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
137				si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
138	if (ret == 0) {
139		spin_lock(&osb->osb_lock);
140		ocfs2_update_slot_info(si);
141		spin_unlock(&osb->osb_lock);
142	}
143
144	return ret;
145}
146
147/* post the our slot info stuff into it's destination bh and write it
148 * out. */
149static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
150					    int slot_num,
151					    struct buffer_head **bh)
152{
153	int blkind = slot_num / si->si_slots_per_block;
154	int slotno = slot_num % si->si_slots_per_block;
155	struct ocfs2_slot_map_extended *se;
156
157	BUG_ON(blkind >= si->si_blocks);
158
159	se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
160	se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
161	if (si->si_slots[slot_num].sl_valid)
162		se->se_slots[slotno].es_node_num =
163			cpu_to_le32(si->si_slots[slot_num].sl_node_num);
164	*bh = si->si_bh[blkind];
165}
166
167static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
168				       int slot_num,
169				       struct buffer_head **bh)
170{
171	int i;
172	struct ocfs2_slot_map *sm;
173
174	sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
175	for (i = 0; i < si->si_num_slots; i++) {
176		if (si->si_slots[i].sl_valid)
177			sm->sm_slots[i] =
178				cpu_to_le16(si->si_slots[i].sl_node_num);
179		else
180			sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
181	}
182	*bh = si->si_bh[0];
183}
184
185static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
186				  struct ocfs2_slot_info *si,
187				  int slot_num)
188{
189	int status;
190	struct buffer_head *bh;
191
192	spin_lock(&osb->osb_lock);
193	if (si->si_extended)
194		ocfs2_update_disk_slot_extended(si, slot_num, &bh);
195	else
196		ocfs2_update_disk_slot_old(si, slot_num, &bh);
197	spin_unlock(&osb->osb_lock);
198
199	status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
200	if (status < 0)
201		mlog_errno(status);
202
203	return status;
204}
205
206/*
207 * Calculate how many bytes are needed by the slot map.  Returns
208 * an error if the slot map file is too small.
209 */
210static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
211					struct inode *inode,
212					unsigned long long *bytes)
213{
214	unsigned long long bytes_needed;
215
216	if (ocfs2_uses_extended_slot_map(osb)) {
217		bytes_needed = osb->max_slots *
218			sizeof(struct ocfs2_extended_slot);
219	} else {
220		bytes_needed = osb->max_slots * sizeof(__le16);
221	}
222	if (bytes_needed > i_size_read(inode)) {
223		mlog(ML_ERROR,
224		     "Slot map file is too small!  (size %llu, needed %llu)\n",
225		     i_size_read(inode), bytes_needed);
226		return -ENOSPC;
227	}
228
229	*bytes = bytes_needed;
230	return 0;
231}
232
233/* try to find global node in the slot info. Returns -ENOENT
234 * if nothing is found. */
235static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
236				    unsigned int node_num)
237{
238	int i, ret = -ENOENT;
239
240	for(i = 0; i < si->si_num_slots; i++) {
241		if (si->si_slots[i].sl_valid &&
242		    (node_num == si->si_slots[i].sl_node_num)) {
243			ret = i;
244			break;
245		}
246	}
247
248	return ret;
249}
250
251static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
252				   int preferred)
253{
254	int i, ret = -ENOSPC;
255
256	if ((preferred >= 0) && (preferred < si->si_num_slots)) {
257		if (!si->si_slots[preferred].sl_valid) {
258			ret = preferred;
259			goto out;
260		}
261	}
262
263	for(i = 0; i < si->si_num_slots; i++) {
264		if (!si->si_slots[i].sl_valid) {
265			ret = i;
266			break;
267		}
268	}
269out:
270	return ret;
271}
272
273int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
274{
275	int slot;
276	struct ocfs2_slot_info *si = osb->slot_info;
277
278	spin_lock(&osb->osb_lock);
279	slot = __ocfs2_node_num_to_slot(si, node_num);
280	spin_unlock(&osb->osb_lock);
281
282	return slot;
283}
284
285int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
286				  unsigned int *node_num)
287{
288	struct ocfs2_slot_info *si = osb->slot_info;
289
290	assert_spin_locked(&osb->osb_lock);
291
292	BUG_ON(slot_num < 0);
293	BUG_ON(slot_num >= osb->max_slots);
294
295	if (!si->si_slots[slot_num].sl_valid)
296		return -ENOENT;
297
298	*node_num = si->si_slots[slot_num].sl_node_num;
299	return 0;
300}
301
302static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
303{
304	unsigned int i;
305
306	if (si == NULL)
307		return;
308
309	iput(si->si_inode);
310	if (si->si_bh) {
311		for (i = 0; i < si->si_blocks; i++) {
312			if (si->si_bh[i]) {
313				brelse(si->si_bh[i]);
314				si->si_bh[i] = NULL;
315			}
316		}
317		kfree(si->si_bh);
318	}
319
320	kfree(si);
321}
322
323int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
324{
325	struct ocfs2_slot_info *si = osb->slot_info;
326
327	if (si == NULL)
328		return 0;
329
330	spin_lock(&osb->osb_lock);
331	ocfs2_invalidate_slot(si, slot_num);
332	spin_unlock(&osb->osb_lock);
333
334	return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
335}
336
337static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
338				  struct ocfs2_slot_info *si)
339{
340	int status = 0;
341	u64 blkno;
342	unsigned long long blocks, bytes = 0;
343	unsigned int i;
344	struct buffer_head *bh;
345
346	status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
347	if (status)
348		goto bail;
349
350	blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
351	BUG_ON(blocks > UINT_MAX);
352	si->si_blocks = blocks;
353	if (!si->si_blocks)
354		goto bail;
355
356	if (si->si_extended)
357		si->si_slots_per_block =
358			(osb->sb->s_blocksize /
359			 sizeof(struct ocfs2_extended_slot));
360	else
361		si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
362
363	/* The size checks above should ensure this */
364	BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
365
366	trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
367
368	si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
369			    GFP_KERNEL);
370	if (!si->si_bh) {
371		status = -ENOMEM;
372		mlog_errno(status);
373		goto bail;
374	}
375
376	for (i = 0; i < si->si_blocks; i++) {
377		status = ocfs2_extent_map_get_blocks(si->si_inode, i,
378						     &blkno, NULL, NULL);
379		if (status < 0) {
380			mlog_errno(status);
381			goto bail;
382		}
383
384		trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i);
385
386		bh = NULL;  /* Acquire a fresh bh */
387		status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
388					   1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
389		if (status < 0) {
390			mlog_errno(status);
391			goto bail;
392		}
393
394		si->si_bh[i] = bh;
395	}
396
397bail:
398	return status;
399}
400
401int ocfs2_init_slot_info(struct ocfs2_super *osb)
402{
403	int status;
404	struct inode *inode = NULL;
405	struct ocfs2_slot_info *si;
406
407	si = kzalloc(struct_size(si, si_slots, osb->max_slots), GFP_KERNEL);
408	if (!si) {
409		status = -ENOMEM;
410		mlog_errno(status);
411		return status;
412	}
413
414	si->si_extended = ocfs2_uses_extended_slot_map(osb);
415	si->si_num_slots = osb->max_slots;
416
417	inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
418					    OCFS2_INVALID_SLOT);
419	if (!inode) {
420		status = -EINVAL;
421		mlog_errno(status);
422		goto bail;
423	}
424
425	si->si_inode = inode;
426	status = ocfs2_map_slot_buffers(osb, si);
427	if (status < 0) {
428		mlog_errno(status);
429		goto bail;
430	}
431
432	osb->slot_info = (struct ocfs2_slot_info *)si;
433bail:
434	if (status < 0)
435		__ocfs2_free_slot_info(si);
436
437	return status;
438}
439
440void ocfs2_free_slot_info(struct ocfs2_super *osb)
441{
442	struct ocfs2_slot_info *si = osb->slot_info;
443
444	osb->slot_info = NULL;
445	__ocfs2_free_slot_info(si);
446}
447
448int ocfs2_find_slot(struct ocfs2_super *osb)
449{
450	int status;
451	int slot;
452	struct ocfs2_slot_info *si;
453
454	si = osb->slot_info;
455
456	spin_lock(&osb->osb_lock);
457	ocfs2_update_slot_info(si);
458
459	/* search for ourselves first and take the slot if it already
460	 * exists. Perhaps we need to mark this in a variable for our
461	 * own journal recovery? Possibly not, though we certainly
462	 * need to warn to the user */
463	slot = __ocfs2_node_num_to_slot(si, osb->node_num);
464	if (slot < 0) {
465		/* if no slot yet, then just take 1st available
466		 * one. */
467		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
468		if (slot < 0) {
469			spin_unlock(&osb->osb_lock);
470			mlog(ML_ERROR, "no free slots available!\n");
471			status = -EINVAL;
472			goto bail;
473		}
474	} else
475		printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
476		       "allocated to this node!\n", slot, osb->dev_str);
477
478	ocfs2_set_slot(si, slot, osb->node_num);
479	osb->slot_num = slot;
480	spin_unlock(&osb->osb_lock);
481
482	trace_ocfs2_find_slot(osb->slot_num);
483
484	status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
485	if (status < 0) {
486		mlog_errno(status);
487		/*
488		 * if write block failed, invalidate slot to avoid overwrite
489		 * slot during dismount in case another node rightly has mounted
490		 */
491		spin_lock(&osb->osb_lock);
492		ocfs2_invalidate_slot(si, osb->slot_num);
493		osb->slot_num = OCFS2_INVALID_SLOT;
494		spin_unlock(&osb->osb_lock);
495	}
496
497bail:
498	return status;
499}
500
501void ocfs2_put_slot(struct ocfs2_super *osb)
502{
503	int status, slot_num;
504	struct ocfs2_slot_info *si = osb->slot_info;
505
506	if (!si)
507		return;
508
509	spin_lock(&osb->osb_lock);
510	ocfs2_update_slot_info(si);
511
512	slot_num = osb->slot_num;
513	ocfs2_invalidate_slot(si, osb->slot_num);
514	osb->slot_num = OCFS2_INVALID_SLOT;
515	spin_unlock(&osb->osb_lock);
516
517	status = ocfs2_update_disk_slot(osb, si, slot_num);
518	if (status < 0)
519		mlog_errno(status);
520
521	ocfs2_free_slot_info(osb);
522}
523