xref: /kernel/linux/linux-5.10/fs/ext4/xattr.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/fs/ext4/xattr.c
4 *
5 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
6 *
7 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
8 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
9 * Extended attributes for symlinks and special files added per
10 *  suggestion of Luka Renko <luka.renko@hermes.si>.
11 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
12 *  Red Hat Inc.
13 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
14 *  and Andreas Gruenbacher <agruen@suse.de>.
15 */
16
17/*
18 * Extended attributes are stored directly in inodes (on file systems with
19 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
20 * field contains the block number if an inode uses an additional block. All
21 * attributes must fit in the inode and one additional block. Blocks that
22 * contain the identical set of attributes may be shared among several inodes.
23 * Identical blocks are detected by keeping a cache of blocks that have
24 * recently been accessed.
25 *
26 * The attributes in inodes and on blocks have a different header; the entries
27 * are stored in the same format:
28 *
29 *   +------------------+
30 *   | header           |
31 *   | entry 1          | |
32 *   | entry 2          | | growing downwards
33 *   | entry 3          | v
34 *   | four null bytes  |
35 *   | . . .            |
36 *   | value 1          | ^
37 *   | value 3          | | growing upwards
38 *   | value 2          | |
39 *   +------------------+
40 *
41 * The header is followed by multiple entry descriptors. In disk blocks, the
42 * entry descriptors are kept sorted. In inodes, they are unsorted. The
43 * attribute values are aligned to the end of the block in no specific order.
44 *
45 * Locking strategy
46 * ----------------
47 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
48 * EA blocks are only changed if they are exclusive to an inode, so
49 * holding xattr_sem also means that nothing but the EA block's reference
50 * count can change. Multiple writers to the same block are synchronized
51 * by the buffer lock.
52 */
53
54#include <linux/init.h>
55#include <linux/fs.h>
56#include <linux/slab.h>
57#include <linux/mbcache.h>
58#include <linux/quotaops.h>
59#include <linux/iversion.h>
60#include "ext4_jbd2.h"
61#include "ext4.h"
62#include "xattr.h"
63#include "acl.h"
64
65#ifdef EXT4_XATTR_DEBUG
66# define ea_idebug(inode, fmt, ...)					\
67	printk(KERN_DEBUG "inode %s:%lu: " fmt "\n",			\
68	       inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
69# define ea_bdebug(bh, fmt, ...)					\
70	printk(KERN_DEBUG "block %pg:%lu: " fmt "\n",			\
71	       bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__)
72#else
73# define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
74# define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
75#endif
76
77static void ext4_xattr_block_cache_insert(struct mb_cache *,
78					  struct buffer_head *);
79static struct buffer_head *
80ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
81			    struct mb_cache_entry **);
82static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
83				    size_t value_count);
84static void ext4_xattr_rehash(struct ext4_xattr_header *);
85
86static const struct xattr_handler * const ext4_xattr_handler_map[] = {
87	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
88#ifdef CONFIG_EXT4_FS_POSIX_ACL
89	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
90	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
91#endif
92	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
93#ifdef CONFIG_EXT4_FS_SECURITY
94	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
95#endif
96	[EXT4_XATTR_INDEX_HURD]		     = &ext4_xattr_hurd_handler,
97};
98
99const struct xattr_handler *ext4_xattr_handlers[] = {
100	&ext4_xattr_user_handler,
101	&ext4_xattr_trusted_handler,
102#ifdef CONFIG_EXT4_FS_POSIX_ACL
103	&posix_acl_access_xattr_handler,
104	&posix_acl_default_xattr_handler,
105#endif
106#ifdef CONFIG_EXT4_FS_SECURITY
107	&ext4_xattr_security_handler,
108#endif
109	&ext4_xattr_hurd_handler,
110	NULL
111};
112
113#define EA_BLOCK_CACHE(inode)	(((struct ext4_sb_info *) \
114				inode->i_sb->s_fs_info)->s_ea_block_cache)
115
116#define EA_INODE_CACHE(inode)	(((struct ext4_sb_info *) \
117				inode->i_sb->s_fs_info)->s_ea_inode_cache)
118
119static int
120ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
121			struct inode *inode);
122
123#ifdef CONFIG_LOCKDEP
124void ext4_xattr_inode_set_class(struct inode *ea_inode)
125{
126	struct ext4_inode_info *ei = EXT4_I(ea_inode);
127
128	lockdep_set_subclass(&ea_inode->i_rwsem, 1);
129	(void) ei;	/* shut up clang warning if !CONFIG_LOCKDEP */
130	lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
131}
132#endif
133
134static __le32 ext4_xattr_block_csum(struct inode *inode,
135				    sector_t block_nr,
136				    struct ext4_xattr_header *hdr)
137{
138	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
139	__u32 csum;
140	__le64 dsk_block_nr = cpu_to_le64(block_nr);
141	__u32 dummy_csum = 0;
142	int offset = offsetof(struct ext4_xattr_header, h_checksum);
143
144	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
145			   sizeof(dsk_block_nr));
146	csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
147	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
148	offset += sizeof(dummy_csum);
149	csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
150			   EXT4_BLOCK_SIZE(inode->i_sb) - offset);
151
152	return cpu_to_le32(csum);
153}
154
155static int ext4_xattr_block_csum_verify(struct inode *inode,
156					struct buffer_head *bh)
157{
158	struct ext4_xattr_header *hdr = BHDR(bh);
159	int ret = 1;
160
161	if (ext4_has_metadata_csum(inode->i_sb)) {
162		lock_buffer(bh);
163		ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
164							bh->b_blocknr, hdr));
165		unlock_buffer(bh);
166	}
167	return ret;
168}
169
170static void ext4_xattr_block_csum_set(struct inode *inode,
171				      struct buffer_head *bh)
172{
173	if (ext4_has_metadata_csum(inode->i_sb))
174		BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
175						bh->b_blocknr, BHDR(bh));
176}
177
178static inline const struct xattr_handler *
179ext4_xattr_handler(int name_index)
180{
181	const struct xattr_handler *handler = NULL;
182
183	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
184		handler = ext4_xattr_handler_map[name_index];
185	return handler;
186}
187
188static int
189ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
190			 void *value_start)
191{
192	struct ext4_xattr_entry *e = entry;
193
194	/* Find the end of the names list */
195	while (!IS_LAST_ENTRY(e)) {
196		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
197		if ((void *)next >= end)
198			return -EFSCORRUPTED;
199		if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
200			return -EFSCORRUPTED;
201		e = next;
202	}
203
204	/* Check the values */
205	while (!IS_LAST_ENTRY(entry)) {
206		u32 size = le32_to_cpu(entry->e_value_size);
207
208		if (size > EXT4_XATTR_SIZE_MAX)
209			return -EFSCORRUPTED;
210
211		if (size != 0 && entry->e_value_inum == 0) {
212			u16 offs = le16_to_cpu(entry->e_value_offs);
213			void *value;
214
215			/*
216			 * The value cannot overlap the names, and the value
217			 * with padding cannot extend beyond 'end'.  Check both
218			 * the padded and unpadded sizes, since the size may
219			 * overflow to 0 when adding padding.
220			 */
221			if (offs > end - value_start)
222				return -EFSCORRUPTED;
223			value = value_start + offs;
224			if (value < (void *)e + sizeof(u32) ||
225			    size > end - value ||
226			    EXT4_XATTR_SIZE(size) > end - value)
227				return -EFSCORRUPTED;
228		}
229		entry = EXT4_XATTR_NEXT(entry);
230	}
231
232	return 0;
233}
234
235static inline int
236__ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
237			 const char *function, unsigned int line)
238{
239	int error = -EFSCORRUPTED;
240
241	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
242	    BHDR(bh)->h_blocks != cpu_to_le32(1))
243		goto errout;
244	if (buffer_verified(bh))
245		return 0;
246
247	error = -EFSBADCRC;
248	if (!ext4_xattr_block_csum_verify(inode, bh))
249		goto errout;
250	error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
251					 bh->b_data);
252errout:
253	if (error)
254		__ext4_error_inode(inode, function, line, 0, -error,
255				   "corrupted xattr block %llu",
256				   (unsigned long long) bh->b_blocknr);
257	else
258		set_buffer_verified(bh);
259	return error;
260}
261
262#define ext4_xattr_check_block(inode, bh) \
263	__ext4_xattr_check_block((inode), (bh),  __func__, __LINE__)
264
265
266static int
267__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
268			 void *end, const char *function, unsigned int line)
269{
270	int error = -EFSCORRUPTED;
271
272	if (end - (void *)header < sizeof(*header) + sizeof(u32) ||
273	    (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
274		goto errout;
275	error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header));
276errout:
277	if (error)
278		__ext4_error_inode(inode, function, line, 0, -error,
279				   "corrupted in-inode xattr");
280	return error;
281}
282
283#define xattr_check_inode(inode, header, end) \
284	__xattr_check_inode((inode), (header), (end), __func__, __LINE__)
285
286static int
287xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry,
288		 void *end, int name_index, const char *name, int sorted)
289{
290	struct ext4_xattr_entry *entry, *next;
291	size_t name_len;
292	int cmp = 1;
293
294	if (name == NULL)
295		return -EINVAL;
296	name_len = strlen(name);
297	for (entry = *pentry; !IS_LAST_ENTRY(entry); entry = next) {
298		next = EXT4_XATTR_NEXT(entry);
299		if ((void *) next >= end) {
300			EXT4_ERROR_INODE(inode, "corrupted xattr entries");
301			return -EFSCORRUPTED;
302		}
303		cmp = name_index - entry->e_name_index;
304		if (!cmp)
305			cmp = name_len - entry->e_name_len;
306		if (!cmp)
307			cmp = memcmp(name, entry->e_name, name_len);
308		if (cmp <= 0 && (sorted || cmp == 0))
309			break;
310	}
311	*pentry = entry;
312	return cmp ? -ENODATA : 0;
313}
314
315static u32
316ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
317{
318	return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
319}
320
321static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
322{
323	return ((u64)ea_inode->i_ctime.tv_sec << 32) |
324		(u32) inode_peek_iversion_raw(ea_inode);
325}
326
327static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
328{
329	ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
330	inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
331}
332
333static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode)
334{
335	return (u32)ea_inode->i_atime.tv_sec;
336}
337
338static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
339{
340	ea_inode->i_atime.tv_sec = hash;
341}
342
343/*
344 * Read the EA value from an inode.
345 */
346static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
347{
348	int blocksize = 1 << ea_inode->i_blkbits;
349	int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
350	int tail_size = (size % blocksize) ?: blocksize;
351	struct buffer_head *bhs_inline[8];
352	struct buffer_head **bhs = bhs_inline;
353	int i, ret;
354
355	if (bh_count > ARRAY_SIZE(bhs_inline)) {
356		bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
357		if (!bhs)
358			return -ENOMEM;
359	}
360
361	ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count,
362			       true /* wait */, bhs);
363	if (ret)
364		goto free_bhs;
365
366	for (i = 0; i < bh_count; i++) {
367		/* There shouldn't be any holes in ea_inode. */
368		if (!bhs[i]) {
369			ret = -EFSCORRUPTED;
370			goto put_bhs;
371		}
372		memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
373		       i < bh_count - 1 ? blocksize : tail_size);
374	}
375	ret = 0;
376put_bhs:
377	for (i = 0; i < bh_count; i++)
378		brelse(bhs[i]);
379free_bhs:
380	if (bhs != bhs_inline)
381		kfree(bhs);
382	return ret;
383}
384
385#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
386
387static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
388				 u32 ea_inode_hash, struct inode **ea_inode)
389{
390	struct inode *inode;
391	int err;
392
393	/*
394	 * We have to check for this corruption early as otherwise
395	 * iget_locked() could wait indefinitely for the state of our
396	 * parent inode.
397	 */
398	if (parent->i_ino == ea_ino) {
399		ext4_error(parent->i_sb,
400			   "Parent and EA inode have the same ino %lu", ea_ino);
401		return -EFSCORRUPTED;
402	}
403
404	inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
405	if (IS_ERR(inode)) {
406		err = PTR_ERR(inode);
407		ext4_error(parent->i_sb,
408			   "error while reading EA inode %lu err=%d", ea_ino,
409			   err);
410		return err;
411	}
412	ext4_xattr_inode_set_class(inode);
413
414	/*
415	 * Check whether this is an old Lustre-style xattr inode. Lustre
416	 * implementation does not have hash validation, rather it has a
417	 * backpointer from ea_inode to the parent inode.
418	 */
419	if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) &&
420	    EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino &&
421	    inode->i_generation == parent->i_generation) {
422		ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
423		ext4_xattr_inode_set_ref(inode, 1);
424	} else {
425		inode_lock(inode);
426		inode->i_flags |= S_NOQUOTA;
427		inode_unlock(inode);
428	}
429
430	*ea_inode = inode;
431	return 0;
432}
433
434/* Remove entry from mbcache when EA inode is getting evicted */
435void ext4_evict_ea_inode(struct inode *inode)
436{
437	struct mb_cache_entry *oe;
438
439	if (!EA_INODE_CACHE(inode))
440		return;
441	/* Wait for entry to get unused so that we can remove it */
442	while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
443			ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
444		mb_cache_entry_wait_unused(oe);
445		mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
446	}
447}
448
449static int
450ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
451			       struct ext4_xattr_entry *entry, void *buffer,
452			       size_t size)
453{
454	u32 hash;
455
456	/* Verify stored hash matches calculated hash. */
457	hash = ext4_xattr_inode_hash(EXT4_SB(ea_inode->i_sb), buffer, size);
458	if (hash != ext4_xattr_inode_get_hash(ea_inode))
459		return -EFSCORRUPTED;
460
461	if (entry) {
462		__le32 e_hash, tmp_data;
463
464		/* Verify entry hash. */
465		tmp_data = cpu_to_le32(hash);
466		e_hash = ext4_xattr_hash_entry(entry->e_name, entry->e_name_len,
467					       &tmp_data, 1);
468		if (e_hash != entry->e_hash)
469			return -EFSCORRUPTED;
470	}
471	return 0;
472}
473
474/*
475 * Read xattr value from the EA inode.
476 */
477static int
478ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
479		     void *buffer, size_t size)
480{
481	struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
482	struct inode *ea_inode;
483	int err;
484
485	err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
486				    le32_to_cpu(entry->e_hash), &ea_inode);
487	if (err) {
488		ea_inode = NULL;
489		goto out;
490	}
491
492	if (i_size_read(ea_inode) != size) {
493		ext4_warning_inode(ea_inode,
494				   "ea_inode file size=%llu entry size=%zu",
495				   i_size_read(ea_inode), size);
496		err = -EFSCORRUPTED;
497		goto out;
498	}
499
500	err = ext4_xattr_inode_read(ea_inode, buffer, size);
501	if (err)
502		goto out;
503
504	if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) {
505		err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer,
506						     size);
507		if (err) {
508			ext4_warning_inode(ea_inode,
509					   "EA inode hash validation failed");
510			goto out;
511		}
512
513		if (ea_inode_cache)
514			mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
515					ext4_xattr_inode_get_hash(ea_inode),
516					ea_inode->i_ino, true /* reusable */);
517	}
518out:
519	iput(ea_inode);
520	return err;
521}
522
523static int
524ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
525		     void *buffer, size_t buffer_size)
526{
527	struct buffer_head *bh = NULL;
528	struct ext4_xattr_entry *entry;
529	size_t size;
530	void *end;
531	int error;
532	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
533
534	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
535		  name_index, name, buffer, (long)buffer_size);
536
537	if (!EXT4_I(inode)->i_file_acl)
538		return -ENODATA;
539	ea_idebug(inode, "reading block %llu",
540		  (unsigned long long)EXT4_I(inode)->i_file_acl);
541	bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
542	if (IS_ERR(bh))
543		return PTR_ERR(bh);
544	ea_bdebug(bh, "b_count=%d, refcount=%d",
545		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
546	error = ext4_xattr_check_block(inode, bh);
547	if (error)
548		goto cleanup;
549	ext4_xattr_block_cache_insert(ea_block_cache, bh);
550	entry = BFIRST(bh);
551	end = bh->b_data + bh->b_size;
552	error = xattr_find_entry(inode, &entry, end, name_index, name, 1);
553	if (error)
554		goto cleanup;
555	size = le32_to_cpu(entry->e_value_size);
556	error = -ERANGE;
557	if (unlikely(size > EXT4_XATTR_SIZE_MAX))
558		goto cleanup;
559	if (buffer) {
560		if (size > buffer_size)
561			goto cleanup;
562		if (entry->e_value_inum) {
563			error = ext4_xattr_inode_get(inode, entry, buffer,
564						     size);
565			if (error)
566				goto cleanup;
567		} else {
568			u16 offset = le16_to_cpu(entry->e_value_offs);
569			void *p = bh->b_data + offset;
570
571			if (unlikely(p + size > end))
572				goto cleanup;
573			memcpy(buffer, p, size);
574		}
575	}
576	error = size;
577
578cleanup:
579	brelse(bh);
580	return error;
581}
582
583int
584ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
585		     void *buffer, size_t buffer_size)
586{
587	struct ext4_xattr_ibody_header *header;
588	struct ext4_xattr_entry *entry;
589	struct ext4_inode *raw_inode;
590	struct ext4_iloc iloc;
591	size_t size;
592	void *end;
593	int error;
594
595	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
596		return -ENODATA;
597	error = ext4_get_inode_loc(inode, &iloc);
598	if (error)
599		return error;
600	raw_inode = ext4_raw_inode(&iloc);
601	header = IHDR(inode, raw_inode);
602	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
603	error = xattr_check_inode(inode, header, end);
604	if (error)
605		goto cleanup;
606	entry = IFIRST(header);
607	error = xattr_find_entry(inode, &entry, end, name_index, name, 0);
608	if (error)
609		goto cleanup;
610	size = le32_to_cpu(entry->e_value_size);
611	error = -ERANGE;
612	if (unlikely(size > EXT4_XATTR_SIZE_MAX))
613		goto cleanup;
614	if (buffer) {
615		if (size > buffer_size)
616			goto cleanup;
617		if (entry->e_value_inum) {
618			error = ext4_xattr_inode_get(inode, entry, buffer,
619						     size);
620			if (error)
621				goto cleanup;
622		} else {
623			u16 offset = le16_to_cpu(entry->e_value_offs);
624			void *p = (void *)IFIRST(header) + offset;
625
626			if (unlikely(p + size > end))
627				goto cleanup;
628			memcpy(buffer, p, size);
629		}
630	}
631	error = size;
632
633cleanup:
634	brelse(iloc.bh);
635	return error;
636}
637
638/*
639 * ext4_xattr_get()
640 *
641 * Copy an extended attribute into the buffer
642 * provided, or compute the buffer size required.
643 * Buffer is NULL to compute the size of the buffer required.
644 *
645 * Returns a negative error number on failure, or the number of bytes
646 * used / required on success.
647 */
648int
649ext4_xattr_get(struct inode *inode, int name_index, const char *name,
650	       void *buffer, size_t buffer_size)
651{
652	int error;
653
654	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
655		return -EIO;
656
657	if (strlen(name) > 255)
658		return -ERANGE;
659
660	down_read(&EXT4_I(inode)->xattr_sem);
661	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
662				     buffer_size);
663	if (error == -ENODATA)
664		error = ext4_xattr_block_get(inode, name_index, name, buffer,
665					     buffer_size);
666	up_read(&EXT4_I(inode)->xattr_sem);
667	return error;
668}
669
670static int
671ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
672			char *buffer, size_t buffer_size)
673{
674	size_t rest = buffer_size;
675
676	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
677		const struct xattr_handler *handler =
678			ext4_xattr_handler(entry->e_name_index);
679
680		if (handler && (!handler->list || handler->list(dentry))) {
681			const char *prefix = handler->prefix ?: handler->name;
682			size_t prefix_len = strlen(prefix);
683			size_t size = prefix_len + entry->e_name_len + 1;
684
685			if (buffer) {
686				if (size > rest)
687					return -ERANGE;
688				memcpy(buffer, prefix, prefix_len);
689				buffer += prefix_len;
690				memcpy(buffer, entry->e_name, entry->e_name_len);
691				buffer += entry->e_name_len;
692				*buffer++ = 0;
693			}
694			rest -= size;
695		}
696	}
697	return buffer_size - rest;  /* total size */
698}
699
700static int
701ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
702{
703	struct inode *inode = d_inode(dentry);
704	struct buffer_head *bh = NULL;
705	int error;
706
707	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
708		  buffer, (long)buffer_size);
709
710	if (!EXT4_I(inode)->i_file_acl)
711		return 0;
712	ea_idebug(inode, "reading block %llu",
713		  (unsigned long long)EXT4_I(inode)->i_file_acl);
714	bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
715	if (IS_ERR(bh))
716		return PTR_ERR(bh);
717	ea_bdebug(bh, "b_count=%d, refcount=%d",
718		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
719	error = ext4_xattr_check_block(inode, bh);
720	if (error)
721		goto cleanup;
722	ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
723	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer,
724					buffer_size);
725cleanup:
726	brelse(bh);
727	return error;
728}
729
730static int
731ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
732{
733	struct inode *inode = d_inode(dentry);
734	struct ext4_xattr_ibody_header *header;
735	struct ext4_inode *raw_inode;
736	struct ext4_iloc iloc;
737	void *end;
738	int error;
739
740	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
741		return 0;
742	error = ext4_get_inode_loc(inode, &iloc);
743	if (error)
744		return error;
745	raw_inode = ext4_raw_inode(&iloc);
746	header = IHDR(inode, raw_inode);
747	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
748	error = xattr_check_inode(inode, header, end);
749	if (error)
750		goto cleanup;
751	error = ext4_xattr_list_entries(dentry, IFIRST(header),
752					buffer, buffer_size);
753
754cleanup:
755	brelse(iloc.bh);
756	return error;
757}
758
759/*
760 * Inode operation listxattr()
761 *
762 * d_inode(dentry)->i_rwsem: don't care
763 *
764 * Copy a list of attribute names into the buffer
765 * provided, or compute the buffer size required.
766 * Buffer is NULL to compute the size of the buffer required.
767 *
768 * Returns a negative error number on failure, or the number of bytes
769 * used / required on success.
770 */
771ssize_t
772ext4_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
773{
774	int ret, ret2;
775
776	down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
777	ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
778	if (ret < 0)
779		goto errout;
780	if (buffer) {
781		buffer += ret;
782		buffer_size -= ret;
783	}
784	ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
785	if (ret < 0)
786		goto errout;
787	ret += ret2;
788errout:
789	up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
790	return ret;
791}
792
793/*
794 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
795 * not set, set it.
796 */
797static void ext4_xattr_update_super_block(handle_t *handle,
798					  struct super_block *sb)
799{
800	if (ext4_has_feature_xattr(sb))
801		return;
802
803	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
804	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
805		lock_buffer(EXT4_SB(sb)->s_sbh);
806		ext4_set_feature_xattr(sb);
807		ext4_superblock_csum_set(sb);
808		unlock_buffer(EXT4_SB(sb)->s_sbh);
809		ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
810	}
811}
812
813int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
814{
815	struct ext4_iloc iloc = { .bh = NULL };
816	struct buffer_head *bh = NULL;
817	struct ext4_inode *raw_inode;
818	struct ext4_xattr_ibody_header *header;
819	struct ext4_xattr_entry *entry;
820	qsize_t ea_inode_refs = 0;
821	void *end;
822	int ret;
823
824	lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
825
826	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
827		ret = ext4_get_inode_loc(inode, &iloc);
828		if (ret)
829			goto out;
830		raw_inode = ext4_raw_inode(&iloc);
831		header = IHDR(inode, raw_inode);
832		end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
833		ret = xattr_check_inode(inode, header, end);
834		if (ret)
835			goto out;
836
837		for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
838		     entry = EXT4_XATTR_NEXT(entry))
839			if (entry->e_value_inum)
840				ea_inode_refs++;
841	}
842
843	if (EXT4_I(inode)->i_file_acl) {
844		bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
845		if (IS_ERR(bh)) {
846			ret = PTR_ERR(bh);
847			bh = NULL;
848			goto out;
849		}
850
851		ret = ext4_xattr_check_block(inode, bh);
852		if (ret)
853			goto out;
854
855		for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
856		     entry = EXT4_XATTR_NEXT(entry))
857			if (entry->e_value_inum)
858				ea_inode_refs++;
859	}
860	*usage = ea_inode_refs + 1;
861	ret = 0;
862out:
863	brelse(iloc.bh);
864	brelse(bh);
865	return ret;
866}
867
868static inline size_t round_up_cluster(struct inode *inode, size_t length)
869{
870	struct super_block *sb = inode->i_sb;
871	size_t cluster_size = 1 << (EXT4_SB(sb)->s_cluster_bits +
872				    inode->i_blkbits);
873	size_t mask = ~(cluster_size - 1);
874
875	return (length + cluster_size - 1) & mask;
876}
877
878static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
879{
880	int err;
881
882	err = dquot_alloc_inode(inode);
883	if (err)
884		return err;
885	err = dquot_alloc_space_nodirty(inode, round_up_cluster(inode, len));
886	if (err)
887		dquot_free_inode(inode);
888	return err;
889}
890
891static void ext4_xattr_inode_free_quota(struct inode *parent,
892					struct inode *ea_inode,
893					size_t len)
894{
895	if (ea_inode &&
896	    ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE))
897		return;
898	dquot_free_space_nodirty(parent, round_up_cluster(parent, len));
899	dquot_free_inode(parent);
900}
901
902int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
903			     struct buffer_head *block_bh, size_t value_len,
904			     bool is_create)
905{
906	int credits;
907	int blocks;
908
909	/*
910	 * 1) Owner inode update
911	 * 2) Ref count update on old xattr block
912	 * 3) new xattr block
913	 * 4) block bitmap update for new xattr block
914	 * 5) group descriptor for new xattr block
915	 * 6) block bitmap update for old xattr block
916	 * 7) group descriptor for old block
917	 *
918	 * 6 & 7 can happen if we have two racing threads T_a and T_b
919	 * which are each trying to set an xattr on inodes I_a and I_b
920	 * which were both initially sharing an xattr block.
921	 */
922	credits = 7;
923
924	/* Quota updates. */
925	credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(sb);
926
927	/*
928	 * In case of inline data, we may push out the data to a block,
929	 * so we need to reserve credits for this eventuality
930	 */
931	if (inode && ext4_has_inline_data(inode))
932		credits += ext4_writepage_trans_blocks(inode) + 1;
933
934	/* We are done if ea_inode feature is not enabled. */
935	if (!ext4_has_feature_ea_inode(sb))
936		return credits;
937
938	/* New ea_inode, inode map, block bitmap, group descriptor. */
939	credits += 4;
940
941	/* Data blocks. */
942	blocks = (value_len + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
943
944	/* Indirection block or one level of extent tree. */
945	blocks += 1;
946
947	/* Block bitmap and group descriptor updates for each block. */
948	credits += blocks * 2;
949
950	/* Blocks themselves. */
951	credits += blocks;
952
953	if (!is_create) {
954		/* Dereference ea_inode holding old xattr value.
955		 * Old ea_inode, inode map, block bitmap, group descriptor.
956		 */
957		credits += 4;
958
959		/* Data blocks for old ea_inode. */
960		blocks = XATTR_SIZE_MAX >> sb->s_blocksize_bits;
961
962		/* Indirection block or one level of extent tree for old
963		 * ea_inode.
964		 */
965		blocks += 1;
966
967		/* Block bitmap and group descriptor updates for each block. */
968		credits += blocks * 2;
969	}
970
971	/* We may need to clone the existing xattr block in which case we need
972	 * to increment ref counts for existing ea_inodes referenced by it.
973	 */
974	if (block_bh) {
975		struct ext4_xattr_entry *entry = BFIRST(block_bh);
976
977		for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry))
978			if (entry->e_value_inum)
979				/* Ref count update on ea_inode. */
980				credits += 1;
981	}
982	return credits;
983}
984
985static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
986				       int ref_change)
987{
988	struct ext4_iloc iloc;
989	s64 ref_count;
990	int ret;
991
992	inode_lock(ea_inode);
993
994	ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
995	if (ret)
996		goto out;
997
998	ref_count = ext4_xattr_inode_get_ref(ea_inode);
999	ref_count += ref_change;
1000	ext4_xattr_inode_set_ref(ea_inode, ref_count);
1001
1002	if (ref_change > 0) {
1003		WARN_ONCE(ref_count <= 0, "EA inode %lu ref_count=%lld",
1004			  ea_inode->i_ino, ref_count);
1005
1006		if (ref_count == 1) {
1007			WARN_ONCE(ea_inode->i_nlink, "EA inode %lu i_nlink=%u",
1008				  ea_inode->i_ino, ea_inode->i_nlink);
1009
1010			set_nlink(ea_inode, 1);
1011			ext4_orphan_del(handle, ea_inode);
1012		}
1013	} else {
1014		WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
1015			  ea_inode->i_ino, ref_count);
1016
1017		if (ref_count == 0) {
1018			WARN_ONCE(ea_inode->i_nlink != 1,
1019				  "EA inode %lu i_nlink=%u",
1020				  ea_inode->i_ino, ea_inode->i_nlink);
1021
1022			clear_nlink(ea_inode);
1023			ext4_orphan_add(handle, ea_inode);
1024		}
1025	}
1026
1027	ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
1028	if (ret)
1029		ext4_warning_inode(ea_inode,
1030				   "ext4_mark_iloc_dirty() failed ret=%d", ret);
1031out:
1032	inode_unlock(ea_inode);
1033	return ret;
1034}
1035
1036static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
1037{
1038	return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
1039}
1040
1041static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
1042{
1043	return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
1044}
1045
1046static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
1047					struct ext4_xattr_entry *first)
1048{
1049	struct inode *ea_inode;
1050	struct ext4_xattr_entry *entry;
1051	struct ext4_xattr_entry *failed_entry;
1052	unsigned int ea_ino;
1053	int err, saved_err;
1054
1055	for (entry = first; !IS_LAST_ENTRY(entry);
1056	     entry = EXT4_XATTR_NEXT(entry)) {
1057		if (!entry->e_value_inum)
1058			continue;
1059		ea_ino = le32_to_cpu(entry->e_value_inum);
1060		err = ext4_xattr_inode_iget(parent, ea_ino,
1061					    le32_to_cpu(entry->e_hash),
1062					    &ea_inode);
1063		if (err)
1064			goto cleanup;
1065		err = ext4_xattr_inode_inc_ref(handle, ea_inode);
1066		if (err) {
1067			ext4_warning_inode(ea_inode, "inc ref error %d", err);
1068			iput(ea_inode);
1069			goto cleanup;
1070		}
1071		iput(ea_inode);
1072	}
1073	return 0;
1074
1075cleanup:
1076	saved_err = err;
1077	failed_entry = entry;
1078
1079	for (entry = first; entry != failed_entry;
1080	     entry = EXT4_XATTR_NEXT(entry)) {
1081		if (!entry->e_value_inum)
1082			continue;
1083		ea_ino = le32_to_cpu(entry->e_value_inum);
1084		err = ext4_xattr_inode_iget(parent, ea_ino,
1085					    le32_to_cpu(entry->e_hash),
1086					    &ea_inode);
1087		if (err) {
1088			ext4_warning(parent->i_sb,
1089				     "cleanup ea_ino %u iget error %d", ea_ino,
1090				     err);
1091			continue;
1092		}
1093		err = ext4_xattr_inode_dec_ref(handle, ea_inode);
1094		if (err)
1095			ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
1096					   err);
1097		iput(ea_inode);
1098	}
1099	return saved_err;
1100}
1101
1102static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode,
1103			struct buffer_head *bh, bool block_csum, bool dirty)
1104{
1105	int error;
1106
1107	if (bh && dirty) {
1108		if (block_csum)
1109			ext4_xattr_block_csum_set(inode, bh);
1110		error = ext4_handle_dirty_metadata(handle, NULL, bh);
1111		if (error) {
1112			ext4_warning(inode->i_sb, "Handle metadata (error %d)",
1113				     error);
1114			return error;
1115		}
1116	}
1117	return 0;
1118}
1119
1120static void
1121ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
1122			     struct buffer_head *bh,
1123			     struct ext4_xattr_entry *first, bool block_csum,
1124			     struct ext4_xattr_inode_array **ea_inode_array,
1125			     int extra_credits, bool skip_quota)
1126{
1127	struct inode *ea_inode;
1128	struct ext4_xattr_entry *entry;
1129	bool dirty = false;
1130	unsigned int ea_ino;
1131	int err;
1132	int credits;
1133
1134	/* One credit for dec ref on ea_inode, one for orphan list addition, */
1135	credits = 2 + extra_credits;
1136
1137	for (entry = first; !IS_LAST_ENTRY(entry);
1138	     entry = EXT4_XATTR_NEXT(entry)) {
1139		if (!entry->e_value_inum)
1140			continue;
1141		ea_ino = le32_to_cpu(entry->e_value_inum);
1142		err = ext4_xattr_inode_iget(parent, ea_ino,
1143					    le32_to_cpu(entry->e_hash),
1144					    &ea_inode);
1145		if (err)
1146			continue;
1147
1148		err = ext4_expand_inode_array(ea_inode_array, ea_inode);
1149		if (err) {
1150			ext4_warning_inode(ea_inode,
1151					   "Expand inode array err=%d", err);
1152			iput(ea_inode);
1153			continue;
1154		}
1155
1156		err = ext4_journal_ensure_credits_fn(handle, credits, credits,
1157			ext4_free_metadata_revoke_credits(parent->i_sb, 1),
1158			ext4_xattr_restart_fn(handle, parent, bh, block_csum,
1159					      dirty));
1160		if (err < 0) {
1161			ext4_warning_inode(ea_inode, "Ensure credits err=%d",
1162					   err);
1163			continue;
1164		}
1165		if (err > 0) {
1166			err = ext4_journal_get_write_access(handle, bh);
1167			if (err) {
1168				ext4_warning_inode(ea_inode,
1169						"Re-get write access err=%d",
1170						err);
1171				continue;
1172			}
1173		}
1174
1175		err = ext4_xattr_inode_dec_ref(handle, ea_inode);
1176		if (err) {
1177			ext4_warning_inode(ea_inode, "ea_inode dec ref err=%d",
1178					   err);
1179			continue;
1180		}
1181
1182		if (!skip_quota)
1183			ext4_xattr_inode_free_quota(parent, ea_inode,
1184					      le32_to_cpu(entry->e_value_size));
1185
1186		/*
1187		 * Forget about ea_inode within the same transaction that
1188		 * decrements the ref count. This avoids duplicate decrements in
1189		 * case the rest of the work spills over to subsequent
1190		 * transactions.
1191		 */
1192		entry->e_value_inum = 0;
1193		entry->e_value_size = 0;
1194
1195		dirty = true;
1196	}
1197
1198	if (dirty) {
1199		/*
1200		 * Note that we are deliberately skipping csum calculation for
1201		 * the final update because we do not expect any journal
1202		 * restarts until xattr block is freed.
1203		 */
1204
1205		err = ext4_handle_dirty_metadata(handle, NULL, bh);
1206		if (err)
1207			ext4_warning_inode(parent,
1208					   "handle dirty metadata err=%d", err);
1209	}
1210}
1211
1212/*
1213 * Release the xattr block BH: If the reference count is > 1, decrement it;
1214 * otherwise free the block.
1215 */
1216static void
1217ext4_xattr_release_block(handle_t *handle, struct inode *inode,
1218			 struct buffer_head *bh,
1219			 struct ext4_xattr_inode_array **ea_inode_array,
1220			 int extra_credits)
1221{
1222	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
1223	u32 hash, ref;
1224	int error = 0;
1225
1226	BUFFER_TRACE(bh, "get_write_access");
1227	error = ext4_journal_get_write_access(handle, bh);
1228	if (error)
1229		goto out;
1230
1231retry_ref:
1232	lock_buffer(bh);
1233	hash = le32_to_cpu(BHDR(bh)->h_hash);
1234	ref = le32_to_cpu(BHDR(bh)->h_refcount);
1235	if (ref == 1) {
1236		ea_bdebug(bh, "refcount now=0; freeing");
1237		/*
1238		 * This must happen under buffer lock for
1239		 * ext4_xattr_block_set() to reliably detect freed block
1240		 */
1241		if (ea_block_cache) {
1242			struct mb_cache_entry *oe;
1243
1244			oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
1245							  bh->b_blocknr);
1246			if (oe) {
1247				unlock_buffer(bh);
1248				mb_cache_entry_wait_unused(oe);
1249				mb_cache_entry_put(ea_block_cache, oe);
1250				goto retry_ref;
1251			}
1252		}
1253		get_bh(bh);
1254		unlock_buffer(bh);
1255
1256		if (ext4_has_feature_ea_inode(inode->i_sb))
1257			ext4_xattr_inode_dec_ref_all(handle, inode, bh,
1258						     BFIRST(bh),
1259						     true /* block_csum */,
1260						     ea_inode_array,
1261						     extra_credits,
1262						     true /* skip_quota */);
1263		ext4_free_blocks(handle, inode, bh, 0, 1,
1264				 EXT4_FREE_BLOCKS_METADATA |
1265				 EXT4_FREE_BLOCKS_FORGET);
1266	} else {
1267		ref--;
1268		BHDR(bh)->h_refcount = cpu_to_le32(ref);
1269		if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
1270			struct mb_cache_entry *ce;
1271
1272			if (ea_block_cache) {
1273				ce = mb_cache_entry_get(ea_block_cache, hash,
1274							bh->b_blocknr);
1275				if (ce) {
1276					set_bit(MBE_REUSABLE_B, &ce->e_flags);
1277					mb_cache_entry_put(ea_block_cache, ce);
1278				}
1279			}
1280		}
1281
1282		ext4_xattr_block_csum_set(inode, bh);
1283		/*
1284		 * Beware of this ugliness: Releasing of xattr block references
1285		 * from different inodes can race and so we have to protect
1286		 * from a race where someone else frees the block (and releases
1287		 * its journal_head) before we are done dirtying the buffer. In
1288		 * nojournal mode this race is harmless and we actually cannot
1289		 * call ext4_handle_dirty_metadata() with locked buffer as
1290		 * that function can call sync_dirty_buffer() so for that case
1291		 * we handle the dirtying after unlocking the buffer.
1292		 */
1293		if (ext4_handle_valid(handle))
1294			error = ext4_handle_dirty_metadata(handle, inode, bh);
1295		unlock_buffer(bh);
1296		if (!ext4_handle_valid(handle))
1297			error = ext4_handle_dirty_metadata(handle, inode, bh);
1298		if (IS_SYNC(inode))
1299			ext4_handle_sync(handle);
1300		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
1301		ea_bdebug(bh, "refcount now=%d; releasing",
1302			  le32_to_cpu(BHDR(bh)->h_refcount));
1303	}
1304out:
1305	ext4_std_error(inode->i_sb, error);
1306	return;
1307}
1308
1309/*
1310 * Find the available free space for EAs. This also returns the total number of
1311 * bytes used by EA entries.
1312 */
1313static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
1314				    size_t *min_offs, void *base, int *total)
1315{
1316	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1317		if (!last->e_value_inum && last->e_value_size) {
1318			size_t offs = le16_to_cpu(last->e_value_offs);
1319			if (offs < *min_offs)
1320				*min_offs = offs;
1321		}
1322		if (total)
1323			*total += EXT4_XATTR_LEN(last->e_name_len);
1324	}
1325	return (*min_offs - ((void *)last - base) - sizeof(__u32));
1326}
1327
1328/*
1329 * Write the value of the EA in an inode.
1330 */
1331static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
1332				  const void *buf, int bufsize)
1333{
1334	struct buffer_head *bh = NULL;
1335	unsigned long block = 0;
1336	int blocksize = ea_inode->i_sb->s_blocksize;
1337	int max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
1338	int csize, wsize = 0;
1339	int ret = 0, ret2 = 0;
1340	int retries = 0;
1341
1342retry:
1343	while (ret >= 0 && ret < max_blocks) {
1344		struct ext4_map_blocks map;
1345		map.m_lblk = block += ret;
1346		map.m_len = max_blocks -= ret;
1347
1348		ret = ext4_map_blocks(handle, ea_inode, &map,
1349				      EXT4_GET_BLOCKS_CREATE);
1350		if (ret <= 0) {
1351			ext4_mark_inode_dirty(handle, ea_inode);
1352			if (ret == -ENOSPC &&
1353			    ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
1354				ret = 0;
1355				goto retry;
1356			}
1357			break;
1358		}
1359	}
1360
1361	if (ret < 0)
1362		return ret;
1363
1364	block = 0;
1365	while (wsize < bufsize) {
1366		brelse(bh);
1367		csize = (bufsize - wsize) > blocksize ? blocksize :
1368								bufsize - wsize;
1369		bh = ext4_getblk(handle, ea_inode, block, 0);
1370		if (IS_ERR(bh))
1371			return PTR_ERR(bh);
1372		if (!bh) {
1373			WARN_ON_ONCE(1);
1374			EXT4_ERROR_INODE(ea_inode,
1375					 "ext4_getblk() return bh = NULL");
1376			return -EFSCORRUPTED;
1377		}
1378		ret = ext4_journal_get_write_access(handle, bh);
1379		if (ret)
1380			goto out;
1381
1382		memcpy(bh->b_data, buf, csize);
1383		set_buffer_uptodate(bh);
1384		ext4_handle_dirty_metadata(handle, ea_inode, bh);
1385
1386		buf += csize;
1387		wsize += csize;
1388		block += 1;
1389	}
1390
1391	inode_lock(ea_inode);
1392	i_size_write(ea_inode, wsize);
1393	ext4_update_i_disksize(ea_inode, wsize);
1394	inode_unlock(ea_inode);
1395
1396	ret2 = ext4_mark_inode_dirty(handle, ea_inode);
1397	if (unlikely(ret2 && !ret))
1398		ret = ret2;
1399
1400out:
1401	brelse(bh);
1402
1403	return ret;
1404}
1405
1406/*
1407 * Create an inode to store the value of a large EA.
1408 */
1409static struct inode *ext4_xattr_inode_create(handle_t *handle,
1410					     struct inode *inode, u32 hash)
1411{
1412	struct inode *ea_inode = NULL;
1413	uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
1414	int err;
1415
1416	if (inode->i_sb->s_root == NULL) {
1417		ext4_warning(inode->i_sb,
1418			     "refuse to create EA inode when umounting");
1419		WARN_ON(1);
1420		return ERR_PTR(-EINVAL);
1421	}
1422
1423	/*
1424	 * Let the next inode be the goal, so we try and allocate the EA inode
1425	 * in the same group, or nearby one.
1426	 */
1427	ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
1428				  S_IFREG | 0600, NULL, inode->i_ino + 1, owner,
1429				  EXT4_EA_INODE_FL);
1430	if (!IS_ERR(ea_inode)) {
1431		ea_inode->i_op = &ext4_file_inode_operations;
1432		ea_inode->i_fop = &ext4_file_operations;
1433		ext4_set_aops(ea_inode);
1434		ext4_xattr_inode_set_class(ea_inode);
1435		unlock_new_inode(ea_inode);
1436		ext4_xattr_inode_set_ref(ea_inode, 1);
1437		ext4_xattr_inode_set_hash(ea_inode, hash);
1438		err = ext4_mark_inode_dirty(handle, ea_inode);
1439		if (!err)
1440			err = ext4_inode_attach_jinode(ea_inode);
1441		if (err) {
1442			if (ext4_xattr_inode_dec_ref(handle, ea_inode))
1443				ext4_warning_inode(ea_inode,
1444					"cleanup dec ref error %d", err);
1445			iput(ea_inode);
1446			return ERR_PTR(err);
1447		}
1448
1449		/*
1450		 * Xattr inodes are shared therefore quota charging is performed
1451		 * at a higher level.
1452		 */
1453		dquot_free_inode(ea_inode);
1454		dquot_drop(ea_inode);
1455		inode_lock(ea_inode);
1456		ea_inode->i_flags |= S_NOQUOTA;
1457		inode_unlock(ea_inode);
1458	}
1459
1460	return ea_inode;
1461}
1462
1463static struct inode *
1464ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
1465			    size_t value_len, u32 hash)
1466{
1467	struct inode *ea_inode;
1468	struct mb_cache_entry *ce;
1469	struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
1470	void *ea_data;
1471
1472	if (!ea_inode_cache)
1473		return NULL;
1474
1475	ce = mb_cache_entry_find_first(ea_inode_cache, hash);
1476	if (!ce)
1477		return NULL;
1478
1479	WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
1480		     !(current->flags & PF_MEMALLOC_NOFS));
1481
1482	ea_data = kvmalloc(value_len, GFP_KERNEL);
1483	if (!ea_data) {
1484		mb_cache_entry_put(ea_inode_cache, ce);
1485		return NULL;
1486	}
1487
1488	while (ce) {
1489		ea_inode = ext4_iget(inode->i_sb, ce->e_value,
1490				     EXT4_IGET_EA_INODE);
1491		if (IS_ERR(ea_inode))
1492			goto next_entry;
1493		ext4_xattr_inode_set_class(ea_inode);
1494		if (i_size_read(ea_inode) == value_len &&
1495		    !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
1496		    !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
1497						    value_len) &&
1498		    !memcmp(value, ea_data, value_len)) {
1499			mb_cache_entry_touch(ea_inode_cache, ce);
1500			mb_cache_entry_put(ea_inode_cache, ce);
1501			kvfree(ea_data);
1502			return ea_inode;
1503		}
1504		iput(ea_inode);
1505	next_entry:
1506		ce = mb_cache_entry_find_next(ea_inode_cache, ce);
1507	}
1508	kvfree(ea_data);
1509	return NULL;
1510}
1511
1512/*
1513 * Add value of the EA in an inode.
1514 */
1515static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
1516					  const void *value, size_t value_len,
1517					  struct inode **ret_inode)
1518{
1519	struct inode *ea_inode;
1520	u32 hash;
1521	int err;
1522
1523	hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
1524	ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
1525	if (ea_inode) {
1526		err = ext4_xattr_inode_inc_ref(handle, ea_inode);
1527		if (err) {
1528			iput(ea_inode);
1529			return err;
1530		}
1531
1532		*ret_inode = ea_inode;
1533		return 0;
1534	}
1535
1536	/* Create an inode for the EA value */
1537	ea_inode = ext4_xattr_inode_create(handle, inode, hash);
1538	if (IS_ERR(ea_inode))
1539		return PTR_ERR(ea_inode);
1540
1541	err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
1542	if (err) {
1543		ext4_xattr_inode_dec_ref(handle, ea_inode);
1544		iput(ea_inode);
1545		return err;
1546	}
1547
1548	if (EA_INODE_CACHE(inode))
1549		mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
1550				      ea_inode->i_ino, true /* reusable */);
1551
1552	*ret_inode = ea_inode;
1553	return 0;
1554}
1555
1556/*
1557 * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
1558 * feature is enabled.
1559 */
1560#define EXT4_XATTR_BLOCK_RESERVE(inode)	min(i_blocksize(inode)/8, 1024U)
1561
1562static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
1563				struct ext4_xattr_search *s,
1564				handle_t *handle, struct inode *inode,
1565				bool is_block)
1566{
1567	struct ext4_xattr_entry *last, *next;
1568	struct ext4_xattr_entry *here = s->here;
1569	size_t min_offs = s->end - s->base, name_len = strlen(i->name);
1570	int in_inode = i->in_inode;
1571	struct inode *old_ea_inode = NULL;
1572	struct inode *new_ea_inode = NULL;
1573	size_t old_size, new_size;
1574	int ret;
1575
1576	/* Space used by old and new values. */
1577	old_size = (!s->not_found && !here->e_value_inum) ?
1578			EXT4_XATTR_SIZE(le32_to_cpu(here->e_value_size)) : 0;
1579	new_size = (i->value && !in_inode) ? EXT4_XATTR_SIZE(i->value_len) : 0;
1580
1581	/*
1582	 * Optimization for the simple case when old and new values have the
1583	 * same padded sizes. Not applicable if external inodes are involved.
1584	 */
1585	if (new_size && new_size == old_size) {
1586		size_t offs = le16_to_cpu(here->e_value_offs);
1587		void *val = s->base + offs;
1588
1589		here->e_value_size = cpu_to_le32(i->value_len);
1590		if (i->value == EXT4_ZERO_XATTR_VALUE) {
1591			memset(val, 0, new_size);
1592		} else {
1593			memcpy(val, i->value, i->value_len);
1594			/* Clear padding bytes. */
1595			memset(val + i->value_len, 0, new_size - i->value_len);
1596		}
1597		goto update_hash;
1598	}
1599
1600	/* Compute min_offs and last. */
1601	last = s->first;
1602	for (; !IS_LAST_ENTRY(last); last = next) {
1603		next = EXT4_XATTR_NEXT(last);
1604		if ((void *)next >= s->end) {
1605			EXT4_ERROR_INODE(inode, "corrupted xattr entries");
1606			ret = -EFSCORRUPTED;
1607			goto out;
1608		}
1609		if (!last->e_value_inum && last->e_value_size) {
1610			size_t offs = le16_to_cpu(last->e_value_offs);
1611			if (offs < min_offs)
1612				min_offs = offs;
1613		}
1614	}
1615
1616	/* Check whether we have enough space. */
1617	if (i->value) {
1618		size_t free;
1619
1620		free = min_offs - ((void *)last - s->base) - sizeof(__u32);
1621		if (!s->not_found)
1622			free += EXT4_XATTR_LEN(name_len) + old_size;
1623
1624		if (free < EXT4_XATTR_LEN(name_len) + new_size) {
1625			ret = -ENOSPC;
1626			goto out;
1627		}
1628
1629		/*
1630		 * If storing the value in an external inode is an option,
1631		 * reserve space for xattr entries/names in the external
1632		 * attribute block so that a long value does not occupy the
1633		 * whole space and prevent further entries being added.
1634		 */
1635		if (ext4_has_feature_ea_inode(inode->i_sb) &&
1636		    new_size && is_block &&
1637		    (min_offs + old_size - new_size) <
1638					EXT4_XATTR_BLOCK_RESERVE(inode)) {
1639			ret = -ENOSPC;
1640			goto out;
1641		}
1642	}
1643
1644	/*
1645	 * Getting access to old and new ea inodes is subject to failures.
1646	 * Finish that work before doing any modifications to the xattr data.
1647	 */
1648	if (!s->not_found && here->e_value_inum) {
1649		ret = ext4_xattr_inode_iget(inode,
1650					    le32_to_cpu(here->e_value_inum),
1651					    le32_to_cpu(here->e_hash),
1652					    &old_ea_inode);
1653		if (ret) {
1654			old_ea_inode = NULL;
1655			goto out;
1656		}
1657	}
1658	if (i->value && in_inode) {
1659		WARN_ON_ONCE(!i->value_len);
1660
1661		ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
1662		if (ret)
1663			goto out;
1664
1665		ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
1666						     i->value_len,
1667						     &new_ea_inode);
1668		if (ret) {
1669			new_ea_inode = NULL;
1670			ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
1671			goto out;
1672		}
1673	}
1674
1675	if (old_ea_inode) {
1676		/* We are ready to release ref count on the old_ea_inode. */
1677		ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
1678		if (ret) {
1679			/* Release newly required ref count on new_ea_inode. */
1680			if (new_ea_inode) {
1681				int err;
1682
1683				err = ext4_xattr_inode_dec_ref(handle,
1684							       new_ea_inode);
1685				if (err)
1686					ext4_warning_inode(new_ea_inode,
1687						  "dec ref new_ea_inode err=%d",
1688						  err);
1689				ext4_xattr_inode_free_quota(inode, new_ea_inode,
1690							    i->value_len);
1691			}
1692			goto out;
1693		}
1694
1695		ext4_xattr_inode_free_quota(inode, old_ea_inode,
1696					    le32_to_cpu(here->e_value_size));
1697	}
1698
1699	/* No failures allowed past this point. */
1700
1701	if (!s->not_found && here->e_value_size && !here->e_value_inum) {
1702		/* Remove the old value. */
1703		void *first_val = s->base + min_offs;
1704		size_t offs = le16_to_cpu(here->e_value_offs);
1705		void *val = s->base + offs;
1706
1707		memmove(first_val + old_size, first_val, val - first_val);
1708		memset(first_val, 0, old_size);
1709		min_offs += old_size;
1710
1711		/* Adjust all value offsets. */
1712		last = s->first;
1713		while (!IS_LAST_ENTRY(last)) {
1714			size_t o = le16_to_cpu(last->e_value_offs);
1715
1716			if (!last->e_value_inum &&
1717			    last->e_value_size && o < offs)
1718				last->e_value_offs = cpu_to_le16(o + old_size);
1719			last = EXT4_XATTR_NEXT(last);
1720		}
1721	}
1722
1723	if (!i->value) {
1724		/* Remove old name. */
1725		size_t size = EXT4_XATTR_LEN(name_len);
1726
1727		last = ENTRY((void *)last - size);
1728		memmove(here, (void *)here + size,
1729			(void *)last - (void *)here + sizeof(__u32));
1730		memset(last, 0, size);
1731
1732		/*
1733		 * Update i_inline_off - moved ibody region might contain
1734		 * system.data attribute.  Handling a failure here won't
1735		 * cause other complications for setting an xattr.
1736		 */
1737		if (!is_block && ext4_has_inline_data(inode)) {
1738			ret = ext4_find_inline_data_nolock(inode);
1739			if (ret) {
1740				ext4_warning_inode(inode,
1741					"unable to update i_inline_off");
1742				goto out;
1743			}
1744		}
1745	} else if (s->not_found) {
1746		/* Insert new name. */
1747		size_t size = EXT4_XATTR_LEN(name_len);
1748		size_t rest = (void *)last - (void *)here + sizeof(__u32);
1749
1750		memmove((void *)here + size, here, rest);
1751		memset(here, 0, size);
1752		here->e_name_index = i->name_index;
1753		here->e_name_len = name_len;
1754		memcpy(here->e_name, i->name, name_len);
1755	} else {
1756		/* This is an update, reset value info. */
1757		here->e_value_inum = 0;
1758		here->e_value_offs = 0;
1759		here->e_value_size = 0;
1760	}
1761
1762	if (i->value) {
1763		/* Insert new value. */
1764		if (in_inode) {
1765			here->e_value_inum = cpu_to_le32(new_ea_inode->i_ino);
1766		} else if (i->value_len) {
1767			void *val = s->base + min_offs - new_size;
1768
1769			here->e_value_offs = cpu_to_le16(min_offs - new_size);
1770			if (i->value == EXT4_ZERO_XATTR_VALUE) {
1771				memset(val, 0, new_size);
1772			} else {
1773				memcpy(val, i->value, i->value_len);
1774				/* Clear padding bytes. */
1775				memset(val + i->value_len, 0,
1776				       new_size - i->value_len);
1777			}
1778		}
1779		here->e_value_size = cpu_to_le32(i->value_len);
1780	}
1781
1782update_hash:
1783	if (i->value) {
1784		__le32 hash = 0;
1785
1786		/* Entry hash calculation. */
1787		if (in_inode) {
1788			__le32 crc32c_hash;
1789
1790			/*
1791			 * Feed crc32c hash instead of the raw value for entry
1792			 * hash calculation. This is to avoid walking
1793			 * potentially long value buffer again.
1794			 */
1795			crc32c_hash = cpu_to_le32(
1796				       ext4_xattr_inode_get_hash(new_ea_inode));
1797			hash = ext4_xattr_hash_entry(here->e_name,
1798						     here->e_name_len,
1799						     &crc32c_hash, 1);
1800		} else if (is_block) {
1801			__le32 *value = s->base + le16_to_cpu(
1802							here->e_value_offs);
1803
1804			hash = ext4_xattr_hash_entry(here->e_name,
1805						     here->e_name_len, value,
1806						     new_size >> 2);
1807		}
1808		here->e_hash = hash;
1809	}
1810
1811	if (is_block)
1812		ext4_xattr_rehash((struct ext4_xattr_header *)s->base);
1813
1814	ret = 0;
1815out:
1816	iput(old_ea_inode);
1817	iput(new_ea_inode);
1818	return ret;
1819}
1820
1821struct ext4_xattr_block_find {
1822	struct ext4_xattr_search s;
1823	struct buffer_head *bh;
1824};
1825
1826static int
1827ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
1828		      struct ext4_xattr_block_find *bs)
1829{
1830	struct super_block *sb = inode->i_sb;
1831	int error;
1832
1833	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
1834		  i->name_index, i->name, i->value, (long)i->value_len);
1835
1836	if (EXT4_I(inode)->i_file_acl) {
1837		/* The inode already has an extended attribute block. */
1838		bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
1839		if (IS_ERR(bs->bh)) {
1840			error = PTR_ERR(bs->bh);
1841			bs->bh = NULL;
1842			return error;
1843		}
1844		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
1845			atomic_read(&(bs->bh->b_count)),
1846			le32_to_cpu(BHDR(bs->bh)->h_refcount));
1847		error = ext4_xattr_check_block(inode, bs->bh);
1848		if (error)
1849			return error;
1850		/* Find the named attribute. */
1851		bs->s.base = BHDR(bs->bh);
1852		bs->s.first = BFIRST(bs->bh);
1853		bs->s.end = bs->bh->b_data + bs->bh->b_size;
1854		bs->s.here = bs->s.first;
1855		error = xattr_find_entry(inode, &bs->s.here, bs->s.end,
1856					 i->name_index, i->name, 1);
1857		if (error && error != -ENODATA)
1858			return error;
1859		bs->s.not_found = error;
1860	}
1861	return 0;
1862}
1863
1864static int
1865ext4_xattr_block_set(handle_t *handle, struct inode *inode,
1866		     struct ext4_xattr_info *i,
1867		     struct ext4_xattr_block_find *bs)
1868{
1869	struct super_block *sb = inode->i_sb;
1870	struct buffer_head *new_bh = NULL;
1871	struct ext4_xattr_search s_copy = bs->s;
1872	struct ext4_xattr_search *s = &s_copy;
1873	struct mb_cache_entry *ce = NULL;
1874	int error = 0;
1875	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
1876	struct inode *ea_inode = NULL, *tmp_inode;
1877	size_t old_ea_inode_quota = 0;
1878	unsigned int ea_ino;
1879
1880
1881#define header(x) ((struct ext4_xattr_header *)(x))
1882
1883	if (s->base) {
1884		int offset = (char *)s->here - bs->bh->b_data;
1885
1886		BUFFER_TRACE(bs->bh, "get_write_access");
1887		error = ext4_journal_get_write_access(handle, bs->bh);
1888		if (error)
1889			goto cleanup;
1890		lock_buffer(bs->bh);
1891
1892		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
1893			__u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
1894
1895			/*
1896			 * This must happen under buffer lock for
1897			 * ext4_xattr_block_set() to reliably detect modified
1898			 * block
1899			 */
1900			if (ea_block_cache) {
1901				struct mb_cache_entry *oe;
1902
1903				oe = mb_cache_entry_delete_or_get(ea_block_cache,
1904					hash, bs->bh->b_blocknr);
1905				if (oe) {
1906					/*
1907					 * Xattr block is getting reused. Leave
1908					 * it alone.
1909					 */
1910					mb_cache_entry_put(ea_block_cache, oe);
1911					goto clone_block;
1912				}
1913			}
1914			ea_bdebug(bs->bh, "modifying in-place");
1915			error = ext4_xattr_set_entry(i, s, handle, inode,
1916						     true /* is_block */);
1917			ext4_xattr_block_csum_set(inode, bs->bh);
1918			unlock_buffer(bs->bh);
1919			if (error == -EFSCORRUPTED)
1920				goto bad_block;
1921			if (!error)
1922				error = ext4_handle_dirty_metadata(handle,
1923								   inode,
1924								   bs->bh);
1925			if (error)
1926				goto cleanup;
1927			goto inserted;
1928		}
1929clone_block:
1930		unlock_buffer(bs->bh);
1931		ea_bdebug(bs->bh, "cloning");
1932		s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
1933		error = -ENOMEM;
1934		if (s->base == NULL)
1935			goto cleanup;
1936		s->first = ENTRY(header(s->base)+1);
1937		header(s->base)->h_refcount = cpu_to_le32(1);
1938		s->here = ENTRY(s->base + offset);
1939		s->end = s->base + bs->bh->b_size;
1940
1941		/*
1942		 * If existing entry points to an xattr inode, we need
1943		 * to prevent ext4_xattr_set_entry() from decrementing
1944		 * ref count on it because the reference belongs to the
1945		 * original block. In this case, make the entry look
1946		 * like it has an empty value.
1947		 */
1948		if (!s->not_found && s->here->e_value_inum) {
1949			ea_ino = le32_to_cpu(s->here->e_value_inum);
1950			error = ext4_xattr_inode_iget(inode, ea_ino,
1951				      le32_to_cpu(s->here->e_hash),
1952				      &tmp_inode);
1953			if (error)
1954				goto cleanup;
1955
1956			if (!ext4_test_inode_state(tmp_inode,
1957					EXT4_STATE_LUSTRE_EA_INODE)) {
1958				/*
1959				 * Defer quota free call for previous
1960				 * inode until success is guaranteed.
1961				 */
1962				old_ea_inode_quota = le32_to_cpu(
1963						s->here->e_value_size);
1964			}
1965			iput(tmp_inode);
1966
1967			s->here->e_value_inum = 0;
1968			s->here->e_value_size = 0;
1969		}
1970	} else {
1971		/* Allocate a buffer where we construct the new block. */
1972		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
1973		/* assert(header == s->base) */
1974		error = -ENOMEM;
1975		if (s->base == NULL)
1976			goto cleanup;
1977		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1978		header(s->base)->h_blocks = cpu_to_le32(1);
1979		header(s->base)->h_refcount = cpu_to_le32(1);
1980		s->first = ENTRY(header(s->base)+1);
1981		s->here = ENTRY(header(s->base)+1);
1982		s->end = s->base + sb->s_blocksize;
1983	}
1984
1985	error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
1986	if (error == -EFSCORRUPTED)
1987		goto bad_block;
1988	if (error)
1989		goto cleanup;
1990
1991	if (i->value && s->here->e_value_inum) {
1992		/*
1993		 * A ref count on ea_inode has been taken as part of the call to
1994		 * ext4_xattr_set_entry() above. We would like to drop this
1995		 * extra ref but we have to wait until the xattr block is
1996		 * initialized and has its own ref count on the ea_inode.
1997		 */
1998		ea_ino = le32_to_cpu(s->here->e_value_inum);
1999		error = ext4_xattr_inode_iget(inode, ea_ino,
2000					      le32_to_cpu(s->here->e_hash),
2001					      &ea_inode);
2002		if (error) {
2003			ea_inode = NULL;
2004			goto cleanup;
2005		}
2006	}
2007
2008inserted:
2009	if (!IS_LAST_ENTRY(s->first)) {
2010		new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
2011						     &ce);
2012		if (new_bh) {
2013			/* We found an identical block in the cache. */
2014			if (new_bh == bs->bh)
2015				ea_bdebug(new_bh, "keeping");
2016			else {
2017				u32 ref;
2018
2019#ifdef EXT4_XATTR_DEBUG
2020				WARN_ON_ONCE(dquot_initialize_needed(inode));
2021#endif
2022				/* The old block is released after updating
2023				   the inode. */
2024				error = dquot_alloc_block(inode,
2025						EXT4_C2B(EXT4_SB(sb), 1));
2026				if (error)
2027					goto cleanup;
2028				BUFFER_TRACE(new_bh, "get_write_access");
2029				error = ext4_journal_get_write_access(handle,
2030								      new_bh);
2031				if (error)
2032					goto cleanup_dquot;
2033				lock_buffer(new_bh);
2034				/*
2035				 * We have to be careful about races with
2036				 * adding references to xattr block. Once we
2037				 * hold buffer lock xattr block's state is
2038				 * stable so we can check the additional
2039				 * reference fits.
2040				 */
2041				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
2042				if (ref > EXT4_XATTR_REFCOUNT_MAX) {
2043					/*
2044					 * Undo everything and check mbcache
2045					 * again.
2046					 */
2047					unlock_buffer(new_bh);
2048					dquot_free_block(inode,
2049							 EXT4_C2B(EXT4_SB(sb),
2050								  1));
2051					brelse(new_bh);
2052					mb_cache_entry_put(ea_block_cache, ce);
2053					ce = NULL;
2054					new_bh = NULL;
2055					goto inserted;
2056				}
2057				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
2058				if (ref == EXT4_XATTR_REFCOUNT_MAX)
2059					clear_bit(MBE_REUSABLE_B, &ce->e_flags);
2060				ea_bdebug(new_bh, "reusing; refcount now=%d",
2061					  ref);
2062				ext4_xattr_block_csum_set(inode, new_bh);
2063				unlock_buffer(new_bh);
2064				error = ext4_handle_dirty_metadata(handle,
2065								   inode,
2066								   new_bh);
2067				if (error)
2068					goto cleanup_dquot;
2069			}
2070			mb_cache_entry_touch(ea_block_cache, ce);
2071			mb_cache_entry_put(ea_block_cache, ce);
2072			ce = NULL;
2073		} else if (bs->bh && s->base == bs->bh->b_data) {
2074			/* We were modifying this block in-place. */
2075			ea_bdebug(bs->bh, "keeping this block");
2076			ext4_xattr_block_cache_insert(ea_block_cache, bs->bh);
2077			new_bh = bs->bh;
2078			get_bh(new_bh);
2079		} else {
2080			/* We need to allocate a new block */
2081			ext4_fsblk_t goal, block;
2082
2083#ifdef EXT4_XATTR_DEBUG
2084			WARN_ON_ONCE(dquot_initialize_needed(inode));
2085#endif
2086			goal = ext4_group_first_block_no(sb,
2087						EXT4_I(inode)->i_block_group);
2088			block = ext4_new_meta_blocks(handle, inode, goal, 0,
2089						     NULL, &error);
2090			if (error)
2091				goto cleanup;
2092
2093			ea_idebug(inode, "creating block %llu",
2094				  (unsigned long long)block);
2095
2096			new_bh = sb_getblk(sb, block);
2097			if (unlikely(!new_bh)) {
2098				error = -ENOMEM;
2099getblk_failed:
2100				ext4_free_blocks(handle, inode, NULL, block, 1,
2101						 EXT4_FREE_BLOCKS_METADATA);
2102				goto cleanup;
2103			}
2104			error = ext4_xattr_inode_inc_ref_all(handle, inode,
2105						      ENTRY(header(s->base)+1));
2106			if (error)
2107				goto getblk_failed;
2108			if (ea_inode) {
2109				/* Drop the extra ref on ea_inode. */
2110				error = ext4_xattr_inode_dec_ref(handle,
2111								 ea_inode);
2112				if (error)
2113					ext4_warning_inode(ea_inode,
2114							   "dec ref error=%d",
2115							   error);
2116				iput(ea_inode);
2117				ea_inode = NULL;
2118			}
2119
2120			lock_buffer(new_bh);
2121			error = ext4_journal_get_create_access(handle, new_bh);
2122			if (error) {
2123				unlock_buffer(new_bh);
2124				error = -EIO;
2125				goto getblk_failed;
2126			}
2127			memcpy(new_bh->b_data, s->base, new_bh->b_size);
2128			ext4_xattr_block_csum_set(inode, new_bh);
2129			set_buffer_uptodate(new_bh);
2130			unlock_buffer(new_bh);
2131			ext4_xattr_block_cache_insert(ea_block_cache, new_bh);
2132			error = ext4_handle_dirty_metadata(handle, inode,
2133							   new_bh);
2134			if (error)
2135				goto cleanup;
2136		}
2137	}
2138
2139	if (old_ea_inode_quota)
2140		ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota);
2141
2142	/* Update the inode. */
2143	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
2144
2145	/* Drop the previous xattr block. */
2146	if (bs->bh && bs->bh != new_bh) {
2147		struct ext4_xattr_inode_array *ea_inode_array = NULL;
2148
2149		ext4_xattr_release_block(handle, inode, bs->bh,
2150					 &ea_inode_array,
2151					 0 /* extra_credits */);
2152		ext4_xattr_inode_array_free(ea_inode_array);
2153	}
2154	error = 0;
2155
2156cleanup:
2157	if (ea_inode) {
2158		int error2;
2159
2160		error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
2161		if (error2)
2162			ext4_warning_inode(ea_inode, "dec ref error=%d",
2163					   error2);
2164
2165		/* If there was an error, revert the quota charge. */
2166		if (error)
2167			ext4_xattr_inode_free_quota(inode, ea_inode,
2168						    i_size_read(ea_inode));
2169		iput(ea_inode);
2170	}
2171	if (ce)
2172		mb_cache_entry_put(ea_block_cache, ce);
2173	brelse(new_bh);
2174	if (!(bs->bh && s->base == bs->bh->b_data))
2175		kfree(s->base);
2176
2177	return error;
2178
2179cleanup_dquot:
2180	dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
2181	goto cleanup;
2182
2183bad_block:
2184	EXT4_ERROR_INODE(inode, "bad block %llu",
2185			 EXT4_I(inode)->i_file_acl);
2186	goto cleanup;
2187
2188#undef header
2189}
2190
2191int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
2192			  struct ext4_xattr_ibody_find *is)
2193{
2194	struct ext4_xattr_ibody_header *header;
2195	struct ext4_inode *raw_inode;
2196	int error;
2197
2198	if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
2199		return 0;
2200
2201	raw_inode = ext4_raw_inode(&is->iloc);
2202	header = IHDR(inode, raw_inode);
2203	is->s.base = is->s.first = IFIRST(header);
2204	is->s.here = is->s.first;
2205	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
2206	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
2207		error = xattr_check_inode(inode, header, is->s.end);
2208		if (error)
2209			return error;
2210		/* Find the named attribute. */
2211		error = xattr_find_entry(inode, &is->s.here, is->s.end,
2212					 i->name_index, i->name, 0);
2213		if (error && error != -ENODATA)
2214			return error;
2215		is->s.not_found = error;
2216	}
2217	return 0;
2218}
2219
2220int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
2221				struct ext4_xattr_info *i,
2222				struct ext4_xattr_ibody_find *is)
2223{
2224	struct ext4_xattr_ibody_header *header;
2225	struct ext4_xattr_search *s = &is->s;
2226	int error;
2227
2228	if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
2229		return -ENOSPC;
2230
2231	error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
2232	if (error)
2233		return error;
2234	header = IHDR(inode, ext4_raw_inode(&is->iloc));
2235	if (!IS_LAST_ENTRY(s->first)) {
2236		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
2237		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
2238	} else {
2239		header->h_magic = cpu_to_le32(0);
2240		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
2241	}
2242	return 0;
2243}
2244
2245static int ext4_xattr_value_same(struct ext4_xattr_search *s,
2246				 struct ext4_xattr_info *i)
2247{
2248	void *value;
2249
2250	/* When e_value_inum is set the value is stored externally. */
2251	if (s->here->e_value_inum)
2252		return 0;
2253	if (le32_to_cpu(s->here->e_value_size) != i->value_len)
2254		return 0;
2255	value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
2256	return !memcmp(value, i->value, i->value_len);
2257}
2258
2259static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
2260{
2261	struct buffer_head *bh;
2262	int error;
2263
2264	if (!EXT4_I(inode)->i_file_acl)
2265		return NULL;
2266	bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
2267	if (IS_ERR(bh))
2268		return bh;
2269	error = ext4_xattr_check_block(inode, bh);
2270	if (error) {
2271		brelse(bh);
2272		return ERR_PTR(error);
2273	}
2274	return bh;
2275}
2276
2277/*
2278 * ext4_xattr_set_handle()
2279 *
2280 * Create, replace or remove an extended attribute for this inode.  Value
2281 * is NULL to remove an existing extended attribute, and non-NULL to
2282 * either replace an existing extended attribute, or create a new extended
2283 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
2284 * specify that an extended attribute must exist and must not exist
2285 * previous to the call, respectively.
2286 *
2287 * Returns 0, or a negative error number on failure.
2288 */
2289int
2290ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
2291		      const char *name, const void *value, size_t value_len,
2292		      int flags)
2293{
2294	struct ext4_xattr_info i = {
2295		.name_index = name_index,
2296		.name = name,
2297		.value = value,
2298		.value_len = value_len,
2299		.in_inode = 0,
2300	};
2301	struct ext4_xattr_ibody_find is = {
2302		.s = { .not_found = -ENODATA, },
2303	};
2304	struct ext4_xattr_block_find bs = {
2305		.s = { .not_found = -ENODATA, },
2306	};
2307	int no_expand;
2308	int error;
2309
2310	if (!name)
2311		return -EINVAL;
2312	if (strlen(name) > 255)
2313		return -ERANGE;
2314
2315	ext4_write_lock_xattr(inode, &no_expand);
2316
2317	/* Check journal credits under write lock. */
2318	if (ext4_handle_valid(handle)) {
2319		struct buffer_head *bh;
2320		int credits;
2321
2322		bh = ext4_xattr_get_block(inode);
2323		if (IS_ERR(bh)) {
2324			error = PTR_ERR(bh);
2325			goto cleanup;
2326		}
2327
2328		credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
2329						   value_len,
2330						   flags & XATTR_CREATE);
2331		brelse(bh);
2332
2333		if (jbd2_handle_buffer_credits(handle) < credits) {
2334			error = -ENOSPC;
2335			goto cleanup;
2336		}
2337		WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
2338	}
2339
2340	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
2341	if (error)
2342		goto cleanup;
2343
2344	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
2345		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
2346		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
2347		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
2348	}
2349
2350	error = ext4_xattr_ibody_find(inode, &i, &is);
2351	if (error)
2352		goto cleanup;
2353	if (is.s.not_found)
2354		error = ext4_xattr_block_find(inode, &i, &bs);
2355	if (error)
2356		goto cleanup;
2357	if (is.s.not_found && bs.s.not_found) {
2358		error = -ENODATA;
2359		if (flags & XATTR_REPLACE)
2360			goto cleanup;
2361		error = 0;
2362		if (!value)
2363			goto cleanup;
2364	} else {
2365		error = -EEXIST;
2366		if (flags & XATTR_CREATE)
2367			goto cleanup;
2368	}
2369
2370	if (!value) {
2371		if (!is.s.not_found)
2372			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
2373		else if (!bs.s.not_found)
2374			error = ext4_xattr_block_set(handle, inode, &i, &bs);
2375	} else {
2376		error = 0;
2377		/* Xattr value did not change? Save us some work and bail out */
2378		if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
2379			goto cleanup;
2380		if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
2381			goto cleanup;
2382
2383		if (ext4_has_feature_ea_inode(inode->i_sb) &&
2384		    (EXT4_XATTR_SIZE(i.value_len) >
2385			EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
2386			i.in_inode = 1;
2387retry_inode:
2388		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
2389		if (!error && !bs.s.not_found) {
2390			i.value = NULL;
2391			error = ext4_xattr_block_set(handle, inode, &i, &bs);
2392		} else if (error == -ENOSPC) {
2393			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
2394				brelse(bs.bh);
2395				bs.bh = NULL;
2396				error = ext4_xattr_block_find(inode, &i, &bs);
2397				if (error)
2398					goto cleanup;
2399			}
2400			error = ext4_xattr_block_set(handle, inode, &i, &bs);
2401			if (!error && !is.s.not_found) {
2402				i.value = NULL;
2403				error = ext4_xattr_ibody_set(handle, inode, &i,
2404							     &is);
2405			} else if (error == -ENOSPC) {
2406				/*
2407				 * Xattr does not fit in the block, store at
2408				 * external inode if possible.
2409				 */
2410				if (ext4_has_feature_ea_inode(inode->i_sb) &&
2411				    i.value_len && !i.in_inode) {
2412					i.in_inode = 1;
2413					goto retry_inode;
2414				}
2415			}
2416		}
2417	}
2418	if (!error) {
2419		ext4_xattr_update_super_block(handle, inode->i_sb);
2420		inode->i_ctime = current_time(inode);
2421		if (!value)
2422			no_expand = 0;
2423		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
2424		/*
2425		 * The bh is consumed by ext4_mark_iloc_dirty, even with
2426		 * error != 0.
2427		 */
2428		is.iloc.bh = NULL;
2429		if (IS_SYNC(inode))
2430			ext4_handle_sync(handle);
2431	}
2432	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
2433
2434cleanup:
2435	brelse(is.iloc.bh);
2436	brelse(bs.bh);
2437	ext4_write_unlock_xattr(inode, &no_expand);
2438	return error;
2439}
2440
2441int ext4_xattr_set_credits(struct inode *inode, size_t value_len,
2442			   bool is_create, int *credits)
2443{
2444	struct buffer_head *bh;
2445	int err;
2446
2447	*credits = 0;
2448
2449	if (!EXT4_SB(inode->i_sb)->s_journal)
2450		return 0;
2451
2452	down_read(&EXT4_I(inode)->xattr_sem);
2453
2454	bh = ext4_xattr_get_block(inode);
2455	if (IS_ERR(bh)) {
2456		err = PTR_ERR(bh);
2457	} else {
2458		*credits = __ext4_xattr_set_credits(inode->i_sb, inode, bh,
2459						    value_len, is_create);
2460		brelse(bh);
2461		err = 0;
2462	}
2463
2464	up_read(&EXT4_I(inode)->xattr_sem);
2465	return err;
2466}
2467
2468/*
2469 * ext4_xattr_set()
2470 *
2471 * Like ext4_xattr_set_handle, but start from an inode. This extended
2472 * attribute modification is a filesystem transaction by itself.
2473 *
2474 * Returns 0, or a negative error number on failure.
2475 */
2476int
2477ext4_xattr_set(struct inode *inode, int name_index, const char *name,
2478	       const void *value, size_t value_len, int flags)
2479{
2480	handle_t *handle;
2481	struct super_block *sb = inode->i_sb;
2482	int error, retries = 0;
2483	int credits;
2484
2485	error = dquot_initialize(inode);
2486	if (error)
2487		return error;
2488
2489retry:
2490	error = ext4_xattr_set_credits(inode, value_len, flags & XATTR_CREATE,
2491				       &credits);
2492	if (error)
2493		return error;
2494
2495	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
2496	if (IS_ERR(handle)) {
2497		error = PTR_ERR(handle);
2498	} else {
2499		int error2;
2500
2501		error = ext4_xattr_set_handle(handle, inode, name_index, name,
2502					      value, value_len, flags);
2503		error2 = ext4_journal_stop(handle);
2504		if (error == -ENOSPC &&
2505		    ext4_should_retry_alloc(sb, &retries))
2506			goto retry;
2507		if (error == 0)
2508			error = error2;
2509	}
2510	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
2511
2512	return error;
2513}
2514
2515/*
2516 * Shift the EA entries in the inode to create space for the increased
2517 * i_extra_isize.
2518 */
2519static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
2520				     int value_offs_shift, void *to,
2521				     void *from, size_t n)
2522{
2523	struct ext4_xattr_entry *last = entry;
2524	int new_offs;
2525
2526	/* We always shift xattr headers further thus offsets get lower */
2527	BUG_ON(value_offs_shift > 0);
2528
2529	/* Adjust the value offsets of the entries */
2530	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
2531		if (!last->e_value_inum && last->e_value_size) {
2532			new_offs = le16_to_cpu(last->e_value_offs) +
2533							value_offs_shift;
2534			last->e_value_offs = cpu_to_le16(new_offs);
2535		}
2536	}
2537	/* Shift the entries by n bytes */
2538	memmove(to, from, n);
2539}
2540
2541/*
2542 * Move xattr pointed to by 'entry' from inode into external xattr block
2543 */
2544static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
2545				    struct ext4_inode *raw_inode,
2546				    struct ext4_xattr_entry *entry)
2547{
2548	struct ext4_xattr_ibody_find *is = NULL;
2549	struct ext4_xattr_block_find *bs = NULL;
2550	char *buffer = NULL, *b_entry_name = NULL;
2551	size_t value_size = le32_to_cpu(entry->e_value_size);
2552	struct ext4_xattr_info i = {
2553		.value = NULL,
2554		.value_len = 0,
2555		.name_index = entry->e_name_index,
2556		.in_inode = !!entry->e_value_inum,
2557	};
2558	struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
2559	int needs_kvfree = 0;
2560	int error;
2561
2562	is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
2563	bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
2564	b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
2565	if (!is || !bs || !b_entry_name) {
2566		error = -ENOMEM;
2567		goto out;
2568	}
2569
2570	is->s.not_found = -ENODATA;
2571	bs->s.not_found = -ENODATA;
2572	is->iloc.bh = NULL;
2573	bs->bh = NULL;
2574
2575	/* Save the entry name and the entry value */
2576	if (entry->e_value_inum) {
2577		buffer = kvmalloc(value_size, GFP_NOFS);
2578		if (!buffer) {
2579			error = -ENOMEM;
2580			goto out;
2581		}
2582		needs_kvfree = 1;
2583		error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
2584		if (error)
2585			goto out;
2586	} else {
2587		size_t value_offs = le16_to_cpu(entry->e_value_offs);
2588		buffer = (void *)IFIRST(header) + value_offs;
2589	}
2590
2591	memcpy(b_entry_name, entry->e_name, entry->e_name_len);
2592	b_entry_name[entry->e_name_len] = '\0';
2593	i.name = b_entry_name;
2594
2595	error = ext4_get_inode_loc(inode, &is->iloc);
2596	if (error)
2597		goto out;
2598
2599	error = ext4_xattr_ibody_find(inode, &i, is);
2600	if (error)
2601		goto out;
2602
2603	i.value = buffer;
2604	i.value_len = value_size;
2605	error = ext4_xattr_block_find(inode, &i, bs);
2606	if (error)
2607		goto out;
2608
2609	/* Move ea entry from the inode into the block */
2610	error = ext4_xattr_block_set(handle, inode, &i, bs);
2611	if (error)
2612		goto out;
2613
2614	/* Remove the chosen entry from the inode */
2615	i.value = NULL;
2616	i.value_len = 0;
2617	error = ext4_xattr_ibody_set(handle, inode, &i, is);
2618
2619out:
2620	kfree(b_entry_name);
2621	if (needs_kvfree && buffer)
2622		kvfree(buffer);
2623	if (is)
2624		brelse(is->iloc.bh);
2625	if (bs)
2626		brelse(bs->bh);
2627	kfree(is);
2628	kfree(bs);
2629
2630	return error;
2631}
2632
2633static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
2634				       struct ext4_inode *raw_inode,
2635				       int isize_diff, size_t ifree,
2636				       size_t bfree, int *total_ino)
2637{
2638	struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
2639	struct ext4_xattr_entry *small_entry;
2640	struct ext4_xattr_entry *entry;
2641	struct ext4_xattr_entry *last;
2642	unsigned int entry_size;	/* EA entry size */
2643	unsigned int total_size;	/* EA entry size + value size */
2644	unsigned int min_total_size;
2645	int error;
2646
2647	while (isize_diff > ifree) {
2648		entry = NULL;
2649		small_entry = NULL;
2650		min_total_size = ~0U;
2651		last = IFIRST(header);
2652		/* Find the entry best suited to be pushed into EA block */
2653		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
2654			/* never move system.data out of the inode */
2655			if ((last->e_name_len == 4) &&
2656			    (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) &&
2657			    !memcmp(last->e_name, "data", 4))
2658				continue;
2659			total_size = EXT4_XATTR_LEN(last->e_name_len);
2660			if (!last->e_value_inum)
2661				total_size += EXT4_XATTR_SIZE(
2662					       le32_to_cpu(last->e_value_size));
2663			if (total_size <= bfree &&
2664			    total_size < min_total_size) {
2665				if (total_size + ifree < isize_diff) {
2666					small_entry = last;
2667				} else {
2668					entry = last;
2669					min_total_size = total_size;
2670				}
2671			}
2672		}
2673
2674		if (entry == NULL) {
2675			if (small_entry == NULL)
2676				return -ENOSPC;
2677			entry = small_entry;
2678		}
2679
2680		entry_size = EXT4_XATTR_LEN(entry->e_name_len);
2681		total_size = entry_size;
2682		if (!entry->e_value_inum)
2683			total_size += EXT4_XATTR_SIZE(
2684					      le32_to_cpu(entry->e_value_size));
2685		error = ext4_xattr_move_to_block(handle, inode, raw_inode,
2686						 entry);
2687		if (error)
2688			return error;
2689
2690		*total_ino -= entry_size;
2691		ifree += total_size;
2692		bfree -= total_size;
2693	}
2694
2695	return 0;
2696}
2697
2698/*
2699 * Expand an inode by new_extra_isize bytes when EAs are present.
2700 * Returns 0 on success or negative error number on failure.
2701 */
2702int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
2703			       struct ext4_inode *raw_inode, handle_t *handle)
2704{
2705	struct ext4_xattr_ibody_header *header;
2706	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2707	static unsigned int mnt_count;
2708	size_t min_offs;
2709	size_t ifree, bfree;
2710	int total_ino;
2711	void *base, *end;
2712	int error = 0, tried_min_extra_isize = 0;
2713	int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize);
2714	int isize_diff;	/* How much do we need to grow i_extra_isize */
2715
2716retry:
2717	isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
2718	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
2719		return 0;
2720
2721	header = IHDR(inode, raw_inode);
2722
2723	/*
2724	 * Check if enough free space is available in the inode to shift the
2725	 * entries ahead by new_extra_isize.
2726	 */
2727
2728	base = IFIRST(header);
2729	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
2730	min_offs = end - base;
2731	total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32);
2732
2733	error = xattr_check_inode(inode, header, end);
2734	if (error)
2735		goto cleanup;
2736
2737	ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
2738	if (ifree >= isize_diff)
2739		goto shift;
2740
2741	/*
2742	 * Enough free space isn't available in the inode, check if
2743	 * EA block can hold new_extra_isize bytes.
2744	 */
2745	if (EXT4_I(inode)->i_file_acl) {
2746		struct buffer_head *bh;
2747
2748		bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
2749		if (IS_ERR(bh)) {
2750			error = PTR_ERR(bh);
2751			goto cleanup;
2752		}
2753		error = ext4_xattr_check_block(inode, bh);
2754		if (error) {
2755			brelse(bh);
2756			goto cleanup;
2757		}
2758		base = BHDR(bh);
2759		end = bh->b_data + bh->b_size;
2760		min_offs = end - base;
2761		bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
2762					      NULL);
2763		brelse(bh);
2764		if (bfree + ifree < isize_diff) {
2765			if (!tried_min_extra_isize && s_min_extra_isize) {
2766				tried_min_extra_isize++;
2767				new_extra_isize = s_min_extra_isize;
2768				goto retry;
2769			}
2770			error = -ENOSPC;
2771			goto cleanup;
2772		}
2773	} else {
2774		bfree = inode->i_sb->s_blocksize;
2775	}
2776
2777	error = ext4_xattr_make_inode_space(handle, inode, raw_inode,
2778					    isize_diff, ifree, bfree,
2779					    &total_ino);
2780	if (error) {
2781		if (error == -ENOSPC && !tried_min_extra_isize &&
2782		    s_min_extra_isize) {
2783			tried_min_extra_isize++;
2784			new_extra_isize = s_min_extra_isize;
2785			goto retry;
2786		}
2787		goto cleanup;
2788	}
2789shift:
2790	/* Adjust the offsets and shift the remaining entries ahead */
2791	ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize
2792			- new_extra_isize, (void *)raw_inode +
2793			EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
2794			(void *)header, total_ino);
2795	EXT4_I(inode)->i_extra_isize = new_extra_isize;
2796
2797	if (ext4_has_inline_data(inode))
2798		error = ext4_find_inline_data_nolock(inode);
2799
2800cleanup:
2801	if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
2802		ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
2803			     inode->i_ino);
2804		mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count);
2805	}
2806	return error;
2807}
2808
2809#define EIA_INCR 16 /* must be 2^n */
2810#define EIA_MASK (EIA_INCR - 1)
2811
2812/* Add the large xattr @inode into @ea_inode_array for deferred iput().
2813 * If @ea_inode_array is new or full it will be grown and the old
2814 * contents copied over.
2815 */
2816static int
2817ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
2818			struct inode *inode)
2819{
2820	if (*ea_inode_array == NULL) {
2821		/*
2822		 * Start with 15 inodes, so it fits into a power-of-two size.
2823		 * If *ea_inode_array is NULL, this is essentially offsetof()
2824		 */
2825		(*ea_inode_array) =
2826			kmalloc(offsetof(struct ext4_xattr_inode_array,
2827					 inodes[EIA_MASK]),
2828				GFP_NOFS);
2829		if (*ea_inode_array == NULL)
2830			return -ENOMEM;
2831		(*ea_inode_array)->count = 0;
2832	} else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
2833		/* expand the array once all 15 + n * 16 slots are full */
2834		struct ext4_xattr_inode_array *new_array = NULL;
2835		int count = (*ea_inode_array)->count;
2836
2837		/* if new_array is NULL, this is essentially offsetof() */
2838		new_array = kmalloc(
2839				offsetof(struct ext4_xattr_inode_array,
2840					 inodes[count + EIA_INCR]),
2841				GFP_NOFS);
2842		if (new_array == NULL)
2843			return -ENOMEM;
2844		memcpy(new_array, *ea_inode_array,
2845		       offsetof(struct ext4_xattr_inode_array, inodes[count]));
2846		kfree(*ea_inode_array);
2847		*ea_inode_array = new_array;
2848	}
2849	(*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
2850	return 0;
2851}
2852
2853/*
2854 * ext4_xattr_delete_inode()
2855 *
2856 * Free extended attribute resources associated with this inode. Traverse
2857 * all entries and decrement reference on any xattr inodes associated with this
2858 * inode. This is called immediately before an inode is freed. We have exclusive
2859 * access to the inode. If an orphan inode is deleted it will also release its
2860 * references on xattr block and xattr inodes.
2861 */
2862int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
2863			    struct ext4_xattr_inode_array **ea_inode_array,
2864			    int extra_credits)
2865{
2866	struct buffer_head *bh = NULL;
2867	struct ext4_xattr_ibody_header *header;
2868	struct ext4_iloc iloc = { .bh = NULL };
2869	struct ext4_xattr_entry *entry;
2870	struct inode *ea_inode;
2871	int error;
2872
2873	error = ext4_journal_ensure_credits(handle, extra_credits,
2874			ext4_free_metadata_revoke_credits(inode->i_sb, 1));
2875	if (error < 0) {
2876		EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
2877		goto cleanup;
2878	}
2879
2880	if (ext4_has_feature_ea_inode(inode->i_sb) &&
2881	    ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
2882
2883		error = ext4_get_inode_loc(inode, &iloc);
2884		if (error) {
2885			EXT4_ERROR_INODE(inode, "inode loc (error %d)", error);
2886			goto cleanup;
2887		}
2888
2889		error = ext4_journal_get_write_access(handle, iloc.bh);
2890		if (error) {
2891			EXT4_ERROR_INODE(inode, "write access (error %d)",
2892					 error);
2893			goto cleanup;
2894		}
2895
2896		header = IHDR(inode, ext4_raw_inode(&iloc));
2897		if (header->h_magic == cpu_to_le32(EXT4_XATTR_MAGIC))
2898			ext4_xattr_inode_dec_ref_all(handle, inode, iloc.bh,
2899						     IFIRST(header),
2900						     false /* block_csum */,
2901						     ea_inode_array,
2902						     extra_credits,
2903						     false /* skip_quota */);
2904	}
2905
2906	if (EXT4_I(inode)->i_file_acl) {
2907		bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
2908		if (IS_ERR(bh)) {
2909			error = PTR_ERR(bh);
2910			if (error == -EIO) {
2911				EXT4_ERROR_INODE_ERR(inode, EIO,
2912						     "block %llu read error",
2913						     EXT4_I(inode)->i_file_acl);
2914			}
2915			bh = NULL;
2916			goto cleanup;
2917		}
2918		error = ext4_xattr_check_block(inode, bh);
2919		if (error)
2920			goto cleanup;
2921
2922		if (ext4_has_feature_ea_inode(inode->i_sb)) {
2923			for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
2924			     entry = EXT4_XATTR_NEXT(entry)) {
2925				if (!entry->e_value_inum)
2926					continue;
2927				error = ext4_xattr_inode_iget(inode,
2928					      le32_to_cpu(entry->e_value_inum),
2929					      le32_to_cpu(entry->e_hash),
2930					      &ea_inode);
2931				if (error)
2932					continue;
2933				ext4_xattr_inode_free_quota(inode, ea_inode,
2934					      le32_to_cpu(entry->e_value_size));
2935				iput(ea_inode);
2936			}
2937
2938		}
2939
2940		ext4_xattr_release_block(handle, inode, bh, ea_inode_array,
2941					 extra_credits);
2942		/*
2943		 * Update i_file_acl value in the same transaction that releases
2944		 * block.
2945		 */
2946		EXT4_I(inode)->i_file_acl = 0;
2947		error = ext4_mark_inode_dirty(handle, inode);
2948		if (error) {
2949			EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
2950					 error);
2951			goto cleanup;
2952		}
2953		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
2954	}
2955	error = 0;
2956cleanup:
2957	brelse(iloc.bh);
2958	brelse(bh);
2959	return error;
2960}
2961
2962void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
2963{
2964	int idx;
2965
2966	if (ea_inode_array == NULL)
2967		return;
2968
2969	for (idx = 0; idx < ea_inode_array->count; ++idx)
2970		iput(ea_inode_array->inodes[idx]);
2971	kfree(ea_inode_array);
2972}
2973
2974/*
2975 * ext4_xattr_block_cache_insert()
2976 *
2977 * Create a new entry in the extended attribute block cache, and insert
2978 * it unless such an entry is already in the cache.
2979 *
2980 * Returns 0, or a negative error number on failure.
2981 */
2982static void
2983ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
2984			      struct buffer_head *bh)
2985{
2986	struct ext4_xattr_header *header = BHDR(bh);
2987	__u32 hash = le32_to_cpu(header->h_hash);
2988	int reusable = le32_to_cpu(header->h_refcount) <
2989		       EXT4_XATTR_REFCOUNT_MAX;
2990	int error;
2991
2992	if (!ea_block_cache)
2993		return;
2994	error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
2995				      bh->b_blocknr, reusable);
2996	if (error) {
2997		if (error == -EBUSY)
2998			ea_bdebug(bh, "already in cache");
2999	} else
3000		ea_bdebug(bh, "inserting [%x]", (int)hash);
3001}
3002
3003/*
3004 * ext4_xattr_cmp()
3005 *
3006 * Compare two extended attribute blocks for equality.
3007 *
3008 * Returns 0 if the blocks are equal, 1 if they differ, and
3009 * a negative error number on errors.
3010 */
3011static int
3012ext4_xattr_cmp(struct ext4_xattr_header *header1,
3013	       struct ext4_xattr_header *header2)
3014{
3015	struct ext4_xattr_entry *entry1, *entry2;
3016
3017	entry1 = ENTRY(header1+1);
3018	entry2 = ENTRY(header2+1);
3019	while (!IS_LAST_ENTRY(entry1)) {
3020		if (IS_LAST_ENTRY(entry2))
3021			return 1;
3022		if (entry1->e_hash != entry2->e_hash ||
3023		    entry1->e_name_index != entry2->e_name_index ||
3024		    entry1->e_name_len != entry2->e_name_len ||
3025		    entry1->e_value_size != entry2->e_value_size ||
3026		    entry1->e_value_inum != entry2->e_value_inum ||
3027		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
3028			return 1;
3029		if (!entry1->e_value_inum &&
3030		    memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
3031			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
3032			   le32_to_cpu(entry1->e_value_size)))
3033			return 1;
3034
3035		entry1 = EXT4_XATTR_NEXT(entry1);
3036		entry2 = EXT4_XATTR_NEXT(entry2);
3037	}
3038	if (!IS_LAST_ENTRY(entry2))
3039		return 1;
3040	return 0;
3041}
3042
3043/*
3044 * ext4_xattr_block_cache_find()
3045 *
3046 * Find an identical extended attribute block.
3047 *
3048 * Returns a pointer to the block found, or NULL if such a block was
3049 * not found or an error occurred.
3050 */
3051static struct buffer_head *
3052ext4_xattr_block_cache_find(struct inode *inode,
3053			    struct ext4_xattr_header *header,
3054			    struct mb_cache_entry **pce)
3055{
3056	__u32 hash = le32_to_cpu(header->h_hash);
3057	struct mb_cache_entry *ce;
3058	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
3059
3060	if (!ea_block_cache)
3061		return NULL;
3062	if (!header->h_hash)
3063		return NULL;  /* never share */
3064	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
3065	ce = mb_cache_entry_find_first(ea_block_cache, hash);
3066	while (ce) {
3067		struct buffer_head *bh;
3068
3069		bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
3070		if (IS_ERR(bh)) {
3071			if (PTR_ERR(bh) == -ENOMEM) {
3072				mb_cache_entry_put(ea_block_cache, ce);
3073				return NULL;
3074			}
3075			bh = NULL;
3076			EXT4_ERROR_INODE(inode, "block %lu read error",
3077					 (unsigned long)ce->e_value);
3078		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
3079			*pce = ce;
3080			return bh;
3081		}
3082		brelse(bh);
3083		ce = mb_cache_entry_find_next(ea_block_cache, ce);
3084	}
3085	return NULL;
3086}
3087
3088#define NAME_HASH_SHIFT 5
3089#define VALUE_HASH_SHIFT 16
3090
3091/*
3092 * ext4_xattr_hash_entry()
3093 *
3094 * Compute the hash of an extended attribute.
3095 */
3096static __le32 ext4_xattr_hash_entry(char *name, size_t name_len, __le32 *value,
3097				    size_t value_count)
3098{
3099	__u32 hash = 0;
3100
3101	while (name_len--) {
3102		hash = (hash << NAME_HASH_SHIFT) ^
3103		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
3104		       *name++;
3105	}
3106	while (value_count--) {
3107		hash = (hash << VALUE_HASH_SHIFT) ^
3108		       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
3109		       le32_to_cpu(*value++);
3110	}
3111	return cpu_to_le32(hash);
3112}
3113
3114#undef NAME_HASH_SHIFT
3115#undef VALUE_HASH_SHIFT
3116
3117#define BLOCK_HASH_SHIFT 16
3118
3119/*
3120 * ext4_xattr_rehash()
3121 *
3122 * Re-compute the extended attribute hash value after an entry has changed.
3123 */
3124static void ext4_xattr_rehash(struct ext4_xattr_header *header)
3125{
3126	struct ext4_xattr_entry *here;
3127	__u32 hash = 0;
3128
3129	here = ENTRY(header+1);
3130	while (!IS_LAST_ENTRY(here)) {
3131		if (!here->e_hash) {
3132			/* Block is not shared if an entry's hash value == 0 */
3133			hash = 0;
3134			break;
3135		}
3136		hash = (hash << BLOCK_HASH_SHIFT) ^
3137		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
3138		       le32_to_cpu(here->e_hash);
3139		here = EXT4_XATTR_NEXT(here);
3140	}
3141	header->h_hash = cpu_to_le32(hash);
3142}
3143
3144#undef BLOCK_HASH_SHIFT
3145
3146#define	HASH_BUCKET_BITS	10
3147
3148struct mb_cache *
3149ext4_xattr_create_cache(void)
3150{
3151	return mb_cache_create(HASH_BUCKET_BITS);
3152}
3153
3154void ext4_xattr_destroy_cache(struct mb_cache *cache)
3155{
3156	if (cache)
3157		mb_cache_destroy(cache);
3158}
3159
3160