1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * Copyright (c) 2013 Red Hat, Inc.
5 * All Rights Reserved.
6 */
7#include "xfs.h"
8#include "xfs_fs.h"
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_mount.h"
14#include "xfs_inode.h"
15#include "xfs_dir2.h"
16#include "xfs_dir2_priv.h"
17#include "xfs_error.h"
18#include "xfs_trans.h"
19#include "xfs_buf_item.h"
20#include "xfs_log.h"
21
22static xfs_failaddr_t xfs_dir2_data_freefind_verify(
23		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
24		struct xfs_dir2_data_unused *dup,
25		struct xfs_dir2_data_free **bf_ent);
26
27struct xfs_dir2_data_free *
28xfs_dir2_data_bestfree_p(
29	struct xfs_mount		*mp,
30	struct xfs_dir2_data_hdr	*hdr)
31{
32	if (xfs_has_crc(mp))
33		return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
34	return hdr->bestfree;
35}
36
37/*
38 * Pointer to an entry's tag word.
39 */
40__be16 *
41xfs_dir2_data_entry_tag_p(
42	struct xfs_mount		*mp,
43	struct xfs_dir2_data_entry	*dep)
44{
45	return (__be16 *)((char *)dep +
46		xfs_dir2_data_entsize(mp, dep->namelen) - sizeof(__be16));
47}
48
49uint8_t
50xfs_dir2_data_get_ftype(
51	struct xfs_mount		*mp,
52	struct xfs_dir2_data_entry	*dep)
53{
54	if (xfs_has_ftype(mp)) {
55		uint8_t			ftype = dep->name[dep->namelen];
56
57		if (likely(ftype < XFS_DIR3_FT_MAX))
58			return ftype;
59	}
60
61	return XFS_DIR3_FT_UNKNOWN;
62}
63
64void
65xfs_dir2_data_put_ftype(
66	struct xfs_mount		*mp,
67	struct xfs_dir2_data_entry	*dep,
68	uint8_t				ftype)
69{
70	ASSERT(ftype < XFS_DIR3_FT_MAX);
71	ASSERT(dep->namelen != 0);
72
73	if (xfs_has_ftype(mp))
74		dep->name[dep->namelen] = ftype;
75}
76
77/*
78 * The number of leaf entries is limited by the size of the block and the amount
79 * of space used by the data entries.  We don't know how much space is used by
80 * the data entries yet, so just ensure that the count falls somewhere inside
81 * the block right now.
82 */
83static inline unsigned int
84xfs_dir2_data_max_leaf_entries(
85	struct xfs_da_geometry		*geo)
86{
87	return (geo->blksize - sizeof(struct xfs_dir2_block_tail) -
88		geo->data_entry_offset) /
89			sizeof(struct xfs_dir2_leaf_entry);
90}
91
92/*
93 * Check the consistency of the data block.
94 * The input can also be a block-format directory.
95 * Return NULL if the buffer is good, otherwise the address of the error.
96 */
97xfs_failaddr_t
98__xfs_dir3_data_check(
99	struct xfs_inode	*dp,		/* incore inode pointer */
100	struct xfs_buf		*bp)		/* data block's buffer */
101{
102	xfs_dir2_dataptr_t	addr;		/* addr for leaf lookup */
103	xfs_dir2_data_free_t	*bf;		/* bestfree table */
104	xfs_dir2_block_tail_t	*btp=NULL;	/* block tail */
105	int			count;		/* count of entries found */
106	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
107	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
108	int			freeseen;	/* mask of bestfrees seen */
109	xfs_dahash_t		hash;		/* hash of current name */
110	int			i;		/* leaf index */
111	int			lastfree;	/* last entry was unused */
112	xfs_dir2_leaf_entry_t	*lep=NULL;	/* block leaf entries */
113	struct xfs_mount	*mp = bp->b_mount;
114	int			stale;		/* count of stale leaves */
115	struct xfs_name		name;
116	unsigned int		offset;
117	unsigned int		end;
118	struct xfs_da_geometry	*geo = mp->m_dir_geo;
119
120	/*
121	 * If this isn't a directory, something is seriously wrong.  Bail out.
122	 */
123	if (dp && !S_ISDIR(VFS_I(dp)->i_mode))
124		return __this_address;
125
126	hdr = bp->b_addr;
127	offset = geo->data_entry_offset;
128
129	switch (hdr->magic) {
130	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
131	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
132		btp = xfs_dir2_block_tail_p(geo, hdr);
133		lep = xfs_dir2_block_leaf_p(btp);
134
135		if (be32_to_cpu(btp->count) >=
136		    xfs_dir2_data_max_leaf_entries(geo))
137			return __this_address;
138		break;
139	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
140	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
141		break;
142	default:
143		return __this_address;
144	}
145	end = xfs_dir3_data_end_offset(geo, hdr);
146	if (!end)
147		return __this_address;
148
149	/*
150	 * Account for zero bestfree entries.
151	 */
152	bf = xfs_dir2_data_bestfree_p(mp, hdr);
153	count = lastfree = freeseen = 0;
154	if (!bf[0].length) {
155		if (bf[0].offset)
156			return __this_address;
157		freeseen |= 1 << 0;
158	}
159	if (!bf[1].length) {
160		if (bf[1].offset)
161			return __this_address;
162		freeseen |= 1 << 1;
163	}
164	if (!bf[2].length) {
165		if (bf[2].offset)
166			return __this_address;
167		freeseen |= 1 << 2;
168	}
169
170	if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
171		return __this_address;
172	if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
173		return __this_address;
174	/*
175	 * Loop over the data/unused entries.
176	 */
177	while (offset < end) {
178		struct xfs_dir2_data_unused	*dup = bp->b_addr + offset;
179		struct xfs_dir2_data_entry	*dep = bp->b_addr + offset;
180
181		/*
182		 * If it's unused, look for the space in the bestfree table.
183		 * If we find it, account for that, else make sure it
184		 * doesn't need to be there.
185		 */
186		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
187			xfs_failaddr_t	fa;
188
189			if (lastfree != 0)
190				return __this_address;
191			if (offset + be16_to_cpu(dup->length) > end)
192				return __this_address;
193			if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
194			    offset)
195				return __this_address;
196			fa = xfs_dir2_data_freefind_verify(hdr, bf, dup, &dfp);
197			if (fa)
198				return fa;
199			if (dfp) {
200				i = (int)(dfp - bf);
201				if ((freeseen & (1 << i)) != 0)
202					return __this_address;
203				freeseen |= 1 << i;
204			} else {
205				if (be16_to_cpu(dup->length) >
206				    be16_to_cpu(bf[2].length))
207					return __this_address;
208			}
209			offset += be16_to_cpu(dup->length);
210			lastfree = 1;
211			continue;
212		}
213		/*
214		 * It's a real entry.  Validate the fields.
215		 * If this is a block directory then make sure it's
216		 * in the leaf section of the block.
217		 * The linear search is crude but this is DEBUG code.
218		 */
219		if (dep->namelen == 0)
220			return __this_address;
221		if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
222			return __this_address;
223		if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end)
224			return __this_address;
225		if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset)
226			return __this_address;
227		if (xfs_dir2_data_get_ftype(mp, dep) >= XFS_DIR3_FT_MAX)
228			return __this_address;
229		count++;
230		lastfree = 0;
231		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
232		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
233			addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
234						(xfs_dir2_data_aoff_t)
235						((char *)dep - (char *)hdr));
236			name.name = dep->name;
237			name.len = dep->namelen;
238			hash = xfs_dir2_hashname(mp, &name);
239			for (i = 0; i < be32_to_cpu(btp->count); i++) {
240				if (be32_to_cpu(lep[i].address) == addr &&
241				    be32_to_cpu(lep[i].hashval) == hash)
242					break;
243			}
244			if (i >= be32_to_cpu(btp->count))
245				return __this_address;
246		}
247		offset += xfs_dir2_data_entsize(mp, dep->namelen);
248	}
249	/*
250	 * Need to have seen all the entries and all the bestfree slots.
251	 */
252	if (freeseen != 7)
253		return __this_address;
254	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
255	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
256		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
257			if (lep[i].address ==
258			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
259				stale++;
260			if (i > 0 && be32_to_cpu(lep[i].hashval) <
261				     be32_to_cpu(lep[i - 1].hashval))
262				return __this_address;
263		}
264		if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale))
265			return __this_address;
266		if (stale != be32_to_cpu(btp->stale))
267			return __this_address;
268	}
269	return NULL;
270}
271
272#ifdef DEBUG
273void
274xfs_dir3_data_check(
275	struct xfs_inode	*dp,
276	struct xfs_buf		*bp)
277{
278	xfs_failaddr_t		fa;
279
280	fa = __xfs_dir3_data_check(dp, bp);
281	if (!fa)
282		return;
283	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
284			bp->b_addr, BBTOB(bp->b_length), __FILE__, __LINE__,
285			fa);
286	ASSERT(0);
287}
288#endif
289
290static xfs_failaddr_t
291xfs_dir3_data_verify(
292	struct xfs_buf		*bp)
293{
294	struct xfs_mount	*mp = bp->b_mount;
295	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
296
297	if (!xfs_verify_magic(bp, hdr3->magic))
298		return __this_address;
299
300	if (xfs_has_crc(mp)) {
301		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
302			return __this_address;
303		if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
304			return __this_address;
305		if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
306			return __this_address;
307	}
308	return __xfs_dir3_data_check(NULL, bp);
309}
310
311/*
312 * Readahead of the first block of the directory when it is opened is completely
313 * oblivious to the format of the directory. Hence we can either get a block
314 * format buffer or a data format buffer on readahead.
315 */
316static void
317xfs_dir3_data_reada_verify(
318	struct xfs_buf		*bp)
319{
320	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
321
322	switch (hdr->magic) {
323	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
324	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
325		bp->b_ops = &xfs_dir3_block_buf_ops;
326		bp->b_ops->verify_read(bp);
327		return;
328	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
329	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
330		bp->b_ops = &xfs_dir3_data_buf_ops;
331		bp->b_ops->verify_read(bp);
332		return;
333	default:
334		xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
335		break;
336	}
337}
338
339static void
340xfs_dir3_data_read_verify(
341	struct xfs_buf	*bp)
342{
343	struct xfs_mount	*mp = bp->b_mount;
344	xfs_failaddr_t		fa;
345
346	if (xfs_has_crc(mp) &&
347	    !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
348		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
349	else {
350		fa = xfs_dir3_data_verify(bp);
351		if (fa)
352			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
353	}
354}
355
356static void
357xfs_dir3_data_write_verify(
358	struct xfs_buf	*bp)
359{
360	struct xfs_mount	*mp = bp->b_mount;
361	struct xfs_buf_log_item	*bip = bp->b_log_item;
362	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
363	xfs_failaddr_t		fa;
364
365	fa = xfs_dir3_data_verify(bp);
366	if (fa) {
367		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
368		return;
369	}
370
371	if (!xfs_has_crc(mp))
372		return;
373
374	if (bip)
375		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
376
377	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
378}
379
380const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
381	.name = "xfs_dir3_data",
382	.magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC),
383		   cpu_to_be32(XFS_DIR3_DATA_MAGIC) },
384	.verify_read = xfs_dir3_data_read_verify,
385	.verify_write = xfs_dir3_data_write_verify,
386	.verify_struct = xfs_dir3_data_verify,
387};
388
389static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
390	.name = "xfs_dir3_data_reada",
391	.magic = { cpu_to_be32(XFS_DIR2_DATA_MAGIC),
392		   cpu_to_be32(XFS_DIR3_DATA_MAGIC) },
393	.verify_read = xfs_dir3_data_reada_verify,
394	.verify_write = xfs_dir3_data_write_verify,
395};
396
397static xfs_failaddr_t
398xfs_dir3_data_header_check(
399	struct xfs_inode	*dp,
400	struct xfs_buf		*bp)
401{
402	struct xfs_mount	*mp = dp->i_mount;
403
404	if (xfs_has_crc(mp)) {
405		struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
406
407		if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
408			return __this_address;
409	}
410
411	return NULL;
412}
413
414int
415xfs_dir3_data_read(
416	struct xfs_trans	*tp,
417	struct xfs_inode	*dp,
418	xfs_dablk_t		bno,
419	unsigned int		flags,
420	struct xfs_buf		**bpp)
421{
422	xfs_failaddr_t		fa;
423	int			err;
424
425	err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK,
426			&xfs_dir3_data_buf_ops);
427	if (err || !*bpp)
428		return err;
429
430	/* Check things that we can't do in the verifier. */
431	fa = xfs_dir3_data_header_check(dp, *bpp);
432	if (fa) {
433		__xfs_buf_mark_corrupt(*bpp, fa);
434		xfs_trans_brelse(tp, *bpp);
435		*bpp = NULL;
436		return -EFSCORRUPTED;
437	}
438
439	xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
440	return err;
441}
442
443int
444xfs_dir3_data_readahead(
445	struct xfs_inode	*dp,
446	xfs_dablk_t		bno,
447	unsigned int		flags)
448{
449	return xfs_da_reada_buf(dp, bno, flags, XFS_DATA_FORK,
450				&xfs_dir3_data_reada_buf_ops);
451}
452
453/*
454 * Find the bestfree entry that exactly coincides with unused directory space
455 * or a verifier error because the bestfree data are bad.
456 */
457static xfs_failaddr_t
458xfs_dir2_data_freefind_verify(
459	struct xfs_dir2_data_hdr	*hdr,
460	struct xfs_dir2_data_free	*bf,
461	struct xfs_dir2_data_unused	*dup,
462	struct xfs_dir2_data_free	**bf_ent)
463{
464	struct xfs_dir2_data_free	*dfp;
465	xfs_dir2_data_aoff_t		off;
466	bool				matched = false;
467	bool				seenzero = false;
468
469	*bf_ent = NULL;
470	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
471
472	/*
473	 * Validate some consistency in the bestfree table.
474	 * Check order, non-overlapping entries, and if we find the
475	 * one we're looking for it has to be exact.
476	 */
477	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
478		if (!dfp->offset) {
479			if (dfp->length)
480				return __this_address;
481			seenzero = true;
482			continue;
483		}
484		if (seenzero)
485			return __this_address;
486		if (be16_to_cpu(dfp->offset) == off) {
487			matched = true;
488			if (dfp->length != dup->length)
489				return __this_address;
490		} else if (be16_to_cpu(dfp->offset) > off) {
491			if (off + be16_to_cpu(dup->length) >
492					be16_to_cpu(dfp->offset))
493				return __this_address;
494		} else {
495			if (be16_to_cpu(dfp->offset) +
496					be16_to_cpu(dfp->length) > off)
497				return __this_address;
498		}
499		if (!matched &&
500		    be16_to_cpu(dfp->length) < be16_to_cpu(dup->length))
501			return __this_address;
502		if (dfp > &bf[0] &&
503		    be16_to_cpu(dfp[-1].length) < be16_to_cpu(dfp[0].length))
504			return __this_address;
505	}
506
507	/* Looks ok so far; now try to match up with a bestfree entry. */
508	*bf_ent = xfs_dir2_data_freefind(hdr, bf, dup);
509	return NULL;
510}
511
512/*
513 * Given a data block and an unused entry from that block,
514 * return the bestfree entry if any that corresponds to it.
515 */
516xfs_dir2_data_free_t *
517xfs_dir2_data_freefind(
518	struct xfs_dir2_data_hdr *hdr,		/* data block header */
519	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
520	struct xfs_dir2_data_unused *dup)	/* unused space */
521{
522	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
523	xfs_dir2_data_aoff_t	off;		/* offset value needed */
524
525	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
526
527	/*
528	 * If this is smaller than the smallest bestfree entry,
529	 * it can't be there since they're sorted.
530	 */
531	if (be16_to_cpu(dup->length) <
532	    be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
533		return NULL;
534	/*
535	 * Look at the three bestfree entries for our guy.
536	 */
537	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
538		if (!dfp->offset)
539			return NULL;
540		if (be16_to_cpu(dfp->offset) == off)
541			return dfp;
542	}
543	/*
544	 * Didn't find it.  This only happens if there are duplicate lengths.
545	 */
546	return NULL;
547}
548
549/*
550 * Insert an unused-space entry into the bestfree table.
551 */
552xfs_dir2_data_free_t *				/* entry inserted */
553xfs_dir2_data_freeinsert(
554	struct xfs_dir2_data_hdr *hdr,		/* data block pointer */
555	struct xfs_dir2_data_free *dfp,		/* bestfree table pointer */
556	struct xfs_dir2_data_unused *dup,	/* unused space */
557	int			*loghead)	/* log the data header (out) */
558{
559	xfs_dir2_data_free_t	new;		/* new bestfree entry */
560
561	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
562	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
563	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
564	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
565
566	new.length = dup->length;
567	new.offset = cpu_to_be16((char *)dup - (char *)hdr);
568
569	/*
570	 * Insert at position 0, 1, or 2; or not at all.
571	 */
572	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
573		dfp[2] = dfp[1];
574		dfp[1] = dfp[0];
575		dfp[0] = new;
576		*loghead = 1;
577		return &dfp[0];
578	}
579	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
580		dfp[2] = dfp[1];
581		dfp[1] = new;
582		*loghead = 1;
583		return &dfp[1];
584	}
585	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
586		dfp[2] = new;
587		*loghead = 1;
588		return &dfp[2];
589	}
590	return NULL;
591}
592
593/*
594 * Remove a bestfree entry from the table.
595 */
596STATIC void
597xfs_dir2_data_freeremove(
598	struct xfs_dir2_data_hdr *hdr,		/* data block header */
599	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
600	struct xfs_dir2_data_free *dfp,		/* bestfree entry pointer */
601	int			*loghead)	/* out: log data header */
602{
603
604	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
605	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
606	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
607	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
608
609	/*
610	 * It's the first entry, slide the next 2 up.
611	 */
612	if (dfp == &bf[0]) {
613		bf[0] = bf[1];
614		bf[1] = bf[2];
615	}
616	/*
617	 * It's the second entry, slide the 3rd entry up.
618	 */
619	else if (dfp == &bf[1])
620		bf[1] = bf[2];
621	/*
622	 * Must be the last entry.
623	 */
624	else
625		ASSERT(dfp == &bf[2]);
626	/*
627	 * Clear the 3rd entry, must be zero now.
628	 */
629	bf[2].length = 0;
630	bf[2].offset = 0;
631	*loghead = 1;
632}
633
634/*
635 * Given a data block, reconstruct its bestfree map.
636 */
637void
638xfs_dir2_data_freescan(
639	struct xfs_mount		*mp,
640	struct xfs_dir2_data_hdr	*hdr,
641	int				*loghead)
642{
643	struct xfs_da_geometry		*geo = mp->m_dir_geo;
644	struct xfs_dir2_data_free	*bf = xfs_dir2_data_bestfree_p(mp, hdr);
645	void				*addr = hdr;
646	unsigned int			offset = geo->data_entry_offset;
647	unsigned int			end;
648
649	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
650	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
651	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
652	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
653
654	/*
655	 * Start by clearing the table.
656	 */
657	memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
658	*loghead = 1;
659
660	end = xfs_dir3_data_end_offset(geo, addr);
661	while (offset < end) {
662		struct xfs_dir2_data_unused	*dup = addr + offset;
663		struct xfs_dir2_data_entry	*dep = addr + offset;
664
665		/*
666		 * If it's a free entry, insert it.
667		 */
668		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
669			ASSERT(offset ==
670			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
671			xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
672			offset += be16_to_cpu(dup->length);
673			continue;
674		}
675
676		/*
677		 * For active entries, check their tags and skip them.
678		 */
679		ASSERT(offset ==
680		       be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)));
681		offset += xfs_dir2_data_entsize(mp, dep->namelen);
682	}
683}
684
685/*
686 * Initialize a data block at the given block number in the directory.
687 * Give back the buffer for the created block.
688 */
689int						/* error */
690xfs_dir3_data_init(
691	struct xfs_da_args		*args,	/* directory operation args */
692	xfs_dir2_db_t			blkno,	/* logical dir block number */
693	struct xfs_buf			**bpp)	/* output block buffer */
694{
695	struct xfs_trans		*tp = args->trans;
696	struct xfs_inode		*dp = args->dp;
697	struct xfs_mount		*mp = dp->i_mount;
698	struct xfs_da_geometry		*geo = args->geo;
699	struct xfs_buf			*bp;
700	struct xfs_dir2_data_hdr	*hdr;
701	struct xfs_dir2_data_unused	*dup;
702	struct xfs_dir2_data_free 	*bf;
703	int				error;
704	int				i;
705
706	/*
707	 * Get the buffer set up for the block.
708	 */
709	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
710			       &bp, XFS_DATA_FORK);
711	if (error)
712		return error;
713	bp->b_ops = &xfs_dir3_data_buf_ops;
714	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
715
716	/*
717	 * Initialize the header.
718	 */
719	hdr = bp->b_addr;
720	if (xfs_has_crc(mp)) {
721		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
722
723		memset(hdr3, 0, sizeof(*hdr3));
724		hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
725		hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
726		hdr3->owner = cpu_to_be64(dp->i_ino);
727		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
728
729	} else
730		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
731
732	bf = xfs_dir2_data_bestfree_p(mp, hdr);
733	bf[0].offset = cpu_to_be16(geo->data_entry_offset);
734	bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset);
735	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
736		bf[i].length = 0;
737		bf[i].offset = 0;
738	}
739
740	/*
741	 * Set up an unused entry for the block's body.
742	 */
743	dup = bp->b_addr + geo->data_entry_offset;
744	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
745	dup->length = bf[0].length;
746	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
747
748	/*
749	 * Log it and return it.
750	 */
751	xfs_dir2_data_log_header(args, bp);
752	xfs_dir2_data_log_unused(args, bp, dup);
753	*bpp = bp;
754	return 0;
755}
756
757/*
758 * Log an active data entry from the block.
759 */
760void
761xfs_dir2_data_log_entry(
762	struct xfs_da_args	*args,
763	struct xfs_buf		*bp,
764	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
765{
766	struct xfs_mount	*mp = bp->b_mount;
767	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
768
769	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
770	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
771	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
772	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
773
774	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
775		(uint)((char *)(xfs_dir2_data_entry_tag_p(mp, dep) + 1) -
776		       (char *)hdr - 1));
777}
778
779/*
780 * Log a data block header.
781 */
782void
783xfs_dir2_data_log_header(
784	struct xfs_da_args	*args,
785	struct xfs_buf		*bp)
786{
787#ifdef DEBUG
788	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
789
790	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
791	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
792	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
793	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
794#endif
795
796	xfs_trans_log_buf(args->trans, bp, 0, args->geo->data_entry_offset - 1);
797}
798
799/*
800 * Log a data unused entry.
801 */
802void
803xfs_dir2_data_log_unused(
804	struct xfs_da_args	*args,
805	struct xfs_buf		*bp,
806	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
807{
808	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
809
810	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
811	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
812	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
813	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
814
815	/*
816	 * Log the first part of the unused entry.
817	 */
818	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
819		(uint)((char *)&dup->length + sizeof(dup->length) -
820		       1 - (char *)hdr));
821	/*
822	 * Log the end (tag) of the unused entry.
823	 */
824	xfs_trans_log_buf(args->trans, bp,
825		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
826		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
827		       sizeof(xfs_dir2_data_off_t) - 1));
828}
829
830/*
831 * Make a byte range in the data block unused.
832 * Its current contents are unimportant.
833 */
834void
835xfs_dir2_data_make_free(
836	struct xfs_da_args	*args,
837	struct xfs_buf		*bp,
838	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
839	xfs_dir2_data_aoff_t	len,		/* length in bytes */
840	int			*needlogp,	/* out: log header */
841	int			*needscanp)	/* out: regen bestfree */
842{
843	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */
844	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
845	int			needscan;	/* need to regen bestfree */
846	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
847	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
848	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
849	unsigned int		end;
850	struct xfs_dir2_data_free *bf;
851
852	hdr = bp->b_addr;
853
854	/*
855	 * Figure out where the end of the data area is.
856	 */
857	end = xfs_dir3_data_end_offset(args->geo, hdr);
858	ASSERT(end != 0);
859
860	/*
861	 * If this isn't the start of the block, then back up to
862	 * the previous entry and see if it's free.
863	 */
864	if (offset > args->geo->data_entry_offset) {
865		__be16			*tagp;	/* tag just before us */
866
867		tagp = (__be16 *)((char *)hdr + offset) - 1;
868		prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
869		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
870			prevdup = NULL;
871	} else
872		prevdup = NULL;
873	/*
874	 * If this isn't the end of the block, see if the entry after
875	 * us is free.
876	 */
877	if (offset + len < end) {
878		postdup =
879			(xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
880		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
881			postdup = NULL;
882	} else
883		postdup = NULL;
884	ASSERT(*needscanp == 0);
885	needscan = 0;
886	/*
887	 * Previous and following entries are both free,
888	 * merge everything into a single free entry.
889	 */
890	bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr);
891	if (prevdup && postdup) {
892		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
893
894		/*
895		 * See if prevdup and/or postdup are in bestfree table.
896		 */
897		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
898		dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
899		/*
900		 * We need a rescan unless there are exactly 2 free entries
901		 * namely our two.  Then we know what's happening, otherwise
902		 * since the third bestfree is there, there might be more
903		 * entries.
904		 */
905		needscan = (bf[2].length != 0);
906		/*
907		 * Fix up the new big freespace.
908		 */
909		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
910		*xfs_dir2_data_unused_tag_p(prevdup) =
911			cpu_to_be16((char *)prevdup - (char *)hdr);
912		xfs_dir2_data_log_unused(args, bp, prevdup);
913		if (!needscan) {
914			/*
915			 * Has to be the case that entries 0 and 1 are
916			 * dfp and dfp2 (don't know which is which), and
917			 * entry 2 is empty.
918			 * Remove entry 1 first then entry 0.
919			 */
920			ASSERT(dfp && dfp2);
921			if (dfp == &bf[1]) {
922				dfp = &bf[0];
923				ASSERT(dfp2 == dfp);
924				dfp2 = &bf[1];
925			}
926			xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
927			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
928			/*
929			 * Now insert the new entry.
930			 */
931			dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
932						       needlogp);
933			ASSERT(dfp == &bf[0]);
934			ASSERT(dfp->length == prevdup->length);
935			ASSERT(!dfp[1].length);
936			ASSERT(!dfp[2].length);
937		}
938	}
939	/*
940	 * The entry before us is free, merge with it.
941	 */
942	else if (prevdup) {
943		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
944		be16_add_cpu(&prevdup->length, len);
945		*xfs_dir2_data_unused_tag_p(prevdup) =
946			cpu_to_be16((char *)prevdup - (char *)hdr);
947		xfs_dir2_data_log_unused(args, bp, prevdup);
948		/*
949		 * If the previous entry was in the table, the new entry
950		 * is longer, so it will be in the table too.  Remove
951		 * the old one and add the new one.
952		 */
953		if (dfp) {
954			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
955			xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
956		}
957		/*
958		 * Otherwise we need a scan if the new entry is big enough.
959		 */
960		else {
961			needscan = be16_to_cpu(prevdup->length) >
962				   be16_to_cpu(bf[2].length);
963		}
964	}
965	/*
966	 * The following entry is free, merge with it.
967	 */
968	else if (postdup) {
969		dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
970		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
971		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
972		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
973		*xfs_dir2_data_unused_tag_p(newdup) =
974			cpu_to_be16((char *)newdup - (char *)hdr);
975		xfs_dir2_data_log_unused(args, bp, newdup);
976		/*
977		 * If the following entry was in the table, the new entry
978		 * is longer, so it will be in the table too.  Remove
979		 * the old one and add the new one.
980		 */
981		if (dfp) {
982			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
983			xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
984		}
985		/*
986		 * Otherwise we need a scan if the new entry is big enough.
987		 */
988		else {
989			needscan = be16_to_cpu(newdup->length) >
990				   be16_to_cpu(bf[2].length);
991		}
992	}
993	/*
994	 * Neither neighbor is free.  Make a new entry.
995	 */
996	else {
997		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
998		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
999		newdup->length = cpu_to_be16(len);
1000		*xfs_dir2_data_unused_tag_p(newdup) =
1001			cpu_to_be16((char *)newdup - (char *)hdr);
1002		xfs_dir2_data_log_unused(args, bp, newdup);
1003		xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
1004	}
1005	*needscanp = needscan;
1006}
1007
1008/* Check our free data for obvious signs of corruption. */
1009static inline xfs_failaddr_t
1010xfs_dir2_data_check_free(
1011	struct xfs_dir2_data_hdr	*hdr,
1012	struct xfs_dir2_data_unused	*dup,
1013	xfs_dir2_data_aoff_t		offset,
1014	xfs_dir2_data_aoff_t		len)
1015{
1016	if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) &&
1017	    hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) &&
1018	    hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) &&
1019	    hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
1020		return __this_address;
1021	if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG)
1022		return __this_address;
1023	if (offset < (char *)dup - (char *)hdr)
1024		return __this_address;
1025	if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr)
1026		return __this_address;
1027	if ((char *)dup - (char *)hdr !=
1028			be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)))
1029		return __this_address;
1030	return NULL;
1031}
1032
1033/* Sanity-check a new bestfree entry. */
1034static inline xfs_failaddr_t
1035xfs_dir2_data_check_new_free(
1036	struct xfs_dir2_data_hdr	*hdr,
1037	struct xfs_dir2_data_free	*dfp,
1038	struct xfs_dir2_data_unused	*newdup)
1039{
1040	if (dfp == NULL)
1041		return __this_address;
1042	if (dfp->length != newdup->length)
1043		return __this_address;
1044	if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr)
1045		return __this_address;
1046	return NULL;
1047}
1048
1049/*
1050 * Take a byte range out of an existing unused space and make it un-free.
1051 */
1052int
1053xfs_dir2_data_use_free(
1054	struct xfs_da_args	*args,
1055	struct xfs_buf		*bp,
1056	xfs_dir2_data_unused_t	*dup,		/* unused entry */
1057	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
1058	xfs_dir2_data_aoff_t	len,		/* length to use */
1059	int			*needlogp,	/* out: need to log header */
1060	int			*needscanp)	/* out: need regen bestfree */
1061{
1062	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
1063	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
1064	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
1065	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
1066	struct xfs_dir2_data_free *bf;
1067	xfs_failaddr_t		fa;
1068	int			matchback;	/* matches end of freespace */
1069	int			matchfront;	/* matches start of freespace */
1070	int			needscan;	/* need to regen bestfree */
1071	int			oldlen;		/* old unused entry's length */
1072
1073	hdr = bp->b_addr;
1074	fa = xfs_dir2_data_check_free(hdr, dup, offset, len);
1075	if (fa)
1076		goto corrupt;
1077	/*
1078	 * Look up the entry in the bestfree table.
1079	 */
1080	oldlen = be16_to_cpu(dup->length);
1081	bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr);
1082	dfp = xfs_dir2_data_freefind(hdr, bf, dup);
1083	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
1084	/*
1085	 * Check for alignment with front and back of the entry.
1086	 */
1087	matchfront = (char *)dup - (char *)hdr == offset;
1088	matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
1089	ASSERT(*needscanp == 0);
1090	needscan = 0;
1091	/*
1092	 * If we matched it exactly we just need to get rid of it from
1093	 * the bestfree table.
1094	 */
1095	if (matchfront && matchback) {
1096		if (dfp) {
1097			needscan = (bf[2].offset != 0);
1098			if (!needscan)
1099				xfs_dir2_data_freeremove(hdr, bf, dfp,
1100							 needlogp);
1101		}
1102	}
1103	/*
1104	 * We match the first part of the entry.
1105	 * Make a new entry with the remaining freespace.
1106	 */
1107	else if (matchfront) {
1108		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
1109		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1110		newdup->length = cpu_to_be16(oldlen - len);
1111		*xfs_dir2_data_unused_tag_p(newdup) =
1112			cpu_to_be16((char *)newdup - (char *)hdr);
1113		xfs_dir2_data_log_unused(args, bp, newdup);
1114		/*
1115		 * If it was in the table, remove it and add the new one.
1116		 */
1117		if (dfp) {
1118			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
1119			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
1120						       needlogp);
1121			fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
1122			if (fa)
1123				goto corrupt;
1124			/*
1125			 * If we got inserted at the last slot,
1126			 * that means we don't know if there was a better
1127			 * choice for the last slot, or not.  Rescan.
1128			 */
1129			needscan = dfp == &bf[2];
1130		}
1131	}
1132	/*
1133	 * We match the last part of the entry.
1134	 * Trim the allocated space off the tail of the entry.
1135	 */
1136	else if (matchback) {
1137		newdup = dup;
1138		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
1139		*xfs_dir2_data_unused_tag_p(newdup) =
1140			cpu_to_be16((char *)newdup - (char *)hdr);
1141		xfs_dir2_data_log_unused(args, bp, newdup);
1142		/*
1143		 * If it was in the table, remove it and add the new one.
1144		 */
1145		if (dfp) {
1146			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
1147			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
1148						       needlogp);
1149			fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
1150			if (fa)
1151				goto corrupt;
1152			/*
1153			 * If we got inserted at the last slot,
1154			 * that means we don't know if there was a better
1155			 * choice for the last slot, or not.  Rescan.
1156			 */
1157			needscan = dfp == &bf[2];
1158		}
1159	}
1160	/*
1161	 * Poking out the middle of an entry.
1162	 * Make two new entries.
1163	 */
1164	else {
1165		newdup = dup;
1166		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
1167		*xfs_dir2_data_unused_tag_p(newdup) =
1168			cpu_to_be16((char *)newdup - (char *)hdr);
1169		xfs_dir2_data_log_unused(args, bp, newdup);
1170		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
1171		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1172		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
1173		*xfs_dir2_data_unused_tag_p(newdup2) =
1174			cpu_to_be16((char *)newdup2 - (char *)hdr);
1175		xfs_dir2_data_log_unused(args, bp, newdup2);
1176		/*
1177		 * If the old entry was in the table, we need to scan
1178		 * if the 3rd entry was valid, since these entries
1179		 * are smaller than the old one.
1180		 * If we don't need to scan that means there were 1 or 2
1181		 * entries in the table, and removing the old and adding
1182		 * the 2 new will work.
1183		 */
1184		if (dfp) {
1185			needscan = (bf[2].length != 0);
1186			if (!needscan) {
1187				xfs_dir2_data_freeremove(hdr, bf, dfp,
1188							 needlogp);
1189				xfs_dir2_data_freeinsert(hdr, bf, newdup,
1190							 needlogp);
1191				xfs_dir2_data_freeinsert(hdr, bf, newdup2,
1192							 needlogp);
1193			}
1194		}
1195	}
1196	*needscanp = needscan;
1197	return 0;
1198corrupt:
1199	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
1200			hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
1201	return -EFSCORRUPTED;
1202}
1203
1204/* Find the end of the entry data in a data/block format dir block. */
1205unsigned int
1206xfs_dir3_data_end_offset(
1207	struct xfs_da_geometry		*geo,
1208	struct xfs_dir2_data_hdr	*hdr)
1209{
1210	void				*p;
1211
1212	switch (hdr->magic) {
1213	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
1214	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
1215		p = xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
1216		return p - (void *)hdr;
1217	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
1218	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
1219		return geo->blksize;
1220	default:
1221		return 0;
1222	}
1223}
1224