xref: /kernel/linux/linux-6.6/fs/nilfs2/inode.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * NILFS inode operations.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Ryusuke Konishi.
8 *
9 */
10
11#include <linux/buffer_head.h>
12#include <linux/gfp.h>
13#include <linux/mpage.h>
14#include <linux/pagemap.h>
15#include <linux/writeback.h>
16#include <linux/uio.h>
17#include <linux/fiemap.h>
18#include "nilfs.h"
19#include "btnode.h"
20#include "segment.h"
21#include "page.h"
22#include "mdt.h"
23#include "cpfile.h"
24#include "ifile.h"
25
26/**
27 * struct nilfs_iget_args - arguments used during comparison between inodes
28 * @ino: inode number
29 * @cno: checkpoint number
30 * @root: pointer on NILFS root object (mounted checkpoint)
31 * @for_gc: inode for GC flag
32 * @for_btnc: inode for B-tree node cache flag
33 * @for_shadow: inode for shadowed page cache flag
34 */
35struct nilfs_iget_args {
36	u64 ino;
37	__u64 cno;
38	struct nilfs_root *root;
39	bool for_gc;
40	bool for_btnc;
41	bool for_shadow;
42};
43
44static int nilfs_iget_test(struct inode *inode, void *opaque);
45
46void nilfs_inode_add_blocks(struct inode *inode, int n)
47{
48	struct nilfs_root *root = NILFS_I(inode)->i_root;
49
50	inode_add_bytes(inode, i_blocksize(inode) * n);
51	if (root)
52		atomic64_add(n, &root->blocks_count);
53}
54
55void nilfs_inode_sub_blocks(struct inode *inode, int n)
56{
57	struct nilfs_root *root = NILFS_I(inode)->i_root;
58
59	inode_sub_bytes(inode, i_blocksize(inode) * n);
60	if (root)
61		atomic64_sub(n, &root->blocks_count);
62}
63
64/**
65 * nilfs_get_block() - get a file block on the filesystem (callback function)
66 * @inode: inode struct of the target file
67 * @blkoff: file block number
68 * @bh_result: buffer head to be mapped on
69 * @create: indicate whether allocating the block or not when it has not
70 *      been allocated yet.
71 *
72 * This function does not issue actual read request of the specified data
73 * block. It is done by VFS.
74 */
75int nilfs_get_block(struct inode *inode, sector_t blkoff,
76		    struct buffer_head *bh_result, int create)
77{
78	struct nilfs_inode_info *ii = NILFS_I(inode);
79	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
80	__u64 blknum = 0;
81	int err = 0, ret;
82	unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits;
83
84	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85	ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
86	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
87	if (ret >= 0) {	/* found */
88		map_bh(bh_result, inode->i_sb, blknum);
89		if (ret > 0)
90			bh_result->b_size = (ret << inode->i_blkbits);
91		goto out;
92	}
93	/* data block was not found */
94	if (ret == -ENOENT && create) {
95		struct nilfs_transaction_info ti;
96
97		bh_result->b_blocknr = 0;
98		err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
99		if (unlikely(err))
100			goto out;
101		err = nilfs_bmap_insert(ii->i_bmap, blkoff,
102					(unsigned long)bh_result);
103		if (unlikely(err != 0)) {
104			if (err == -EEXIST) {
105				/*
106				 * The get_block() function could be called
107				 * from multiple callers for an inode.
108				 * However, the page having this block must
109				 * be locked in this case.
110				 */
111				nilfs_warn(inode->i_sb,
112					   "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
113					   __func__, inode->i_ino,
114					   (unsigned long long)blkoff);
115				err = 0;
116			}
117			nilfs_transaction_abort(inode->i_sb);
118			goto out;
119		}
120		nilfs_mark_inode_dirty_sync(inode);
121		nilfs_transaction_commit(inode->i_sb); /* never fails */
122		/* Error handling should be detailed */
123		set_buffer_new(bh_result);
124		set_buffer_delay(bh_result);
125		map_bh(bh_result, inode->i_sb, 0);
126		/* Disk block number must be changed to proper value */
127
128	} else if (ret == -ENOENT) {
129		/*
130		 * not found is not error (e.g. hole); must return without
131		 * the mapped state flag.
132		 */
133		;
134	} else {
135		err = ret;
136	}
137
138 out:
139	return err;
140}
141
142/**
143 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {}
144 * address_space_operations.
145 * @file: file struct of the file to be read
146 * @folio: the folio to be read
147 */
148static int nilfs_read_folio(struct file *file, struct folio *folio)
149{
150	return mpage_read_folio(folio, nilfs_get_block);
151}
152
153static void nilfs_readahead(struct readahead_control *rac)
154{
155	mpage_readahead(rac, nilfs_get_block);
156}
157
158static int nilfs_writepages(struct address_space *mapping,
159			    struct writeback_control *wbc)
160{
161	struct inode *inode = mapping->host;
162	int err = 0;
163
164	if (sb_rdonly(inode->i_sb)) {
165		nilfs_clear_dirty_pages(mapping, false);
166		return -EROFS;
167	}
168
169	if (wbc->sync_mode == WB_SYNC_ALL)
170		err = nilfs_construct_dsync_segment(inode->i_sb, inode,
171						    wbc->range_start,
172						    wbc->range_end);
173	return err;
174}
175
176static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
177{
178	struct inode *inode = page->mapping->host;
179	int err;
180
181	if (sb_rdonly(inode->i_sb)) {
182		/*
183		 * It means that filesystem was remounted in read-only
184		 * mode because of error or metadata corruption. But we
185		 * have dirty pages that try to be flushed in background.
186		 * So, here we simply discard this dirty page.
187		 */
188		nilfs_clear_dirty_page(page, false);
189		unlock_page(page);
190		return -EROFS;
191	}
192
193	redirty_page_for_writepage(wbc, page);
194	unlock_page(page);
195
196	if (wbc->sync_mode == WB_SYNC_ALL) {
197		err = nilfs_construct_segment(inode->i_sb);
198		if (unlikely(err))
199			return err;
200	} else if (wbc->for_reclaim)
201		nilfs_flush_segment(inode->i_sb, inode->i_ino);
202
203	return 0;
204}
205
206static bool nilfs_dirty_folio(struct address_space *mapping,
207		struct folio *folio)
208{
209	struct inode *inode = mapping->host;
210	struct buffer_head *head;
211	unsigned int nr_dirty = 0;
212	bool ret = filemap_dirty_folio(mapping, folio);
213
214	/*
215	 * The page may not be locked, eg if called from try_to_unmap_one()
216	 */
217	spin_lock(&mapping->private_lock);
218	head = folio_buffers(folio);
219	if (head) {
220		struct buffer_head *bh = head;
221
222		do {
223			/* Do not mark hole blocks dirty */
224			if (buffer_dirty(bh) || !buffer_mapped(bh))
225				continue;
226
227			set_buffer_dirty(bh);
228			nr_dirty++;
229		} while (bh = bh->b_this_page, bh != head);
230	} else if (ret) {
231		nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits);
232	}
233	spin_unlock(&mapping->private_lock);
234
235	if (nr_dirty)
236		nilfs_set_file_dirty(inode, nr_dirty);
237	return ret;
238}
239
240void nilfs_write_failed(struct address_space *mapping, loff_t to)
241{
242	struct inode *inode = mapping->host;
243
244	if (to > inode->i_size) {
245		truncate_pagecache(inode, inode->i_size);
246		nilfs_truncate(inode);
247	}
248}
249
250static int nilfs_write_begin(struct file *file, struct address_space *mapping,
251			     loff_t pos, unsigned len,
252			     struct page **pagep, void **fsdata)
253
254{
255	struct inode *inode = mapping->host;
256	int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
257
258	if (unlikely(err))
259		return err;
260
261	err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block);
262	if (unlikely(err)) {
263		nilfs_write_failed(mapping, pos + len);
264		nilfs_transaction_abort(inode->i_sb);
265	}
266	return err;
267}
268
269static int nilfs_write_end(struct file *file, struct address_space *mapping,
270			   loff_t pos, unsigned len, unsigned copied,
271			   struct page *page, void *fsdata)
272{
273	struct inode *inode = mapping->host;
274	unsigned int start = pos & (PAGE_SIZE - 1);
275	unsigned int nr_dirty;
276	int err;
277
278	nr_dirty = nilfs_page_count_clean_buffers(page, start,
279						  start + copied);
280	copied = generic_write_end(file, mapping, pos, len, copied, page,
281				   fsdata);
282	nilfs_set_file_dirty(inode, nr_dirty);
283	err = nilfs_transaction_commit(inode->i_sb);
284	return err ? : copied;
285}
286
287static ssize_t
288nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
289{
290	struct inode *inode = file_inode(iocb->ki_filp);
291
292	if (iov_iter_rw(iter) == WRITE)
293		return 0;
294
295	/* Needs synchronization with the cleaner */
296	return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block);
297}
298
299const struct address_space_operations nilfs_aops = {
300	.writepage		= nilfs_writepage,
301	.read_folio		= nilfs_read_folio,
302	.writepages		= nilfs_writepages,
303	.dirty_folio		= nilfs_dirty_folio,
304	.readahead		= nilfs_readahead,
305	.write_begin		= nilfs_write_begin,
306	.write_end		= nilfs_write_end,
307	.invalidate_folio	= block_invalidate_folio,
308	.direct_IO		= nilfs_direct_IO,
309	.is_partially_uptodate  = block_is_partially_uptodate,
310};
311
312static int nilfs_insert_inode_locked(struct inode *inode,
313				     struct nilfs_root *root,
314				     unsigned long ino)
315{
316	struct nilfs_iget_args args = {
317		.ino = ino, .root = root, .cno = 0, .for_gc = false,
318		.for_btnc = false, .for_shadow = false
319	};
320
321	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
322}
323
324struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
325{
326	struct super_block *sb = dir->i_sb;
327	struct the_nilfs *nilfs = sb->s_fs_info;
328	struct inode *inode;
329	struct nilfs_inode_info *ii;
330	struct nilfs_root *root;
331	struct buffer_head *bh;
332	int err = -ENOMEM;
333	ino_t ino;
334
335	inode = new_inode(sb);
336	if (unlikely(!inode))
337		goto failed;
338
339	mapping_set_gfp_mask(inode->i_mapping,
340			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
341
342	root = NILFS_I(dir)->i_root;
343	ii = NILFS_I(inode);
344	ii->i_state = BIT(NILFS_I_NEW);
345	ii->i_root = root;
346
347	err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
348	if (unlikely(err))
349		goto failed_ifile_create_inode;
350	/* reference count of i_bh inherits from nilfs_mdt_read_block() */
351
352	if (unlikely(ino < NILFS_USER_INO)) {
353		nilfs_warn(sb,
354			   "inode bitmap is inconsistent for reserved inodes");
355		do {
356			brelse(bh);
357			err = nilfs_ifile_create_inode(root->ifile, &ino, &bh);
358			if (unlikely(err))
359				goto failed_ifile_create_inode;
360		} while (ino < NILFS_USER_INO);
361
362		nilfs_info(sb, "repaired inode bitmap for reserved inodes");
363	}
364	ii->i_bh = bh;
365
366	atomic64_inc(&root->inodes_count);
367	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
368	inode->i_ino = ino;
369	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
370
371	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
372		err = nilfs_bmap_read(ii->i_bmap, NULL);
373		if (err < 0)
374			goto failed_after_creation;
375
376		set_bit(NILFS_I_BMAP, &ii->i_state);
377		/* No lock is needed; iget() ensures it. */
378	}
379
380	ii->i_flags = nilfs_mask_flags(
381		mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
382
383	/* ii->i_file_acl = 0; */
384	/* ii->i_dir_acl = 0; */
385	ii->i_dir_start_lookup = 0;
386	nilfs_set_inode_flags(inode);
387	spin_lock(&nilfs->ns_next_gen_lock);
388	inode->i_generation = nilfs->ns_next_generation++;
389	spin_unlock(&nilfs->ns_next_gen_lock);
390	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
391		err = -EIO;
392		goto failed_after_creation;
393	}
394
395	err = nilfs_init_acl(inode, dir);
396	if (unlikely(err))
397		/*
398		 * Never occur.  When supporting nilfs_init_acl(),
399		 * proper cancellation of above jobs should be considered.
400		 */
401		goto failed_after_creation;
402
403	return inode;
404
405 failed_after_creation:
406	clear_nlink(inode);
407	if (inode->i_state & I_NEW)
408		unlock_new_inode(inode);
409	iput(inode);  /*
410		       * raw_inode will be deleted through
411		       * nilfs_evict_inode().
412		       */
413	goto failed;
414
415 failed_ifile_create_inode:
416	make_bad_inode(inode);
417	iput(inode);
418 failed:
419	return ERR_PTR(err);
420}
421
422void nilfs_set_inode_flags(struct inode *inode)
423{
424	unsigned int flags = NILFS_I(inode)->i_flags;
425	unsigned int new_fl = 0;
426
427	if (flags & FS_SYNC_FL)
428		new_fl |= S_SYNC;
429	if (flags & FS_APPEND_FL)
430		new_fl |= S_APPEND;
431	if (flags & FS_IMMUTABLE_FL)
432		new_fl |= S_IMMUTABLE;
433	if (flags & FS_NOATIME_FL)
434		new_fl |= S_NOATIME;
435	if (flags & FS_DIRSYNC_FL)
436		new_fl |= S_DIRSYNC;
437	inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
438			S_NOATIME | S_DIRSYNC);
439}
440
441int nilfs_read_inode_common(struct inode *inode,
442			    struct nilfs_inode *raw_inode)
443{
444	struct nilfs_inode_info *ii = NILFS_I(inode);
445	int err;
446
447	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
448	i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
449	i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
450	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
451	inode->i_size = le64_to_cpu(raw_inode->i_size);
452	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
453	inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
454			le32_to_cpu(raw_inode->i_ctime_nsec));
455	inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
456	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
457	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
458	if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
459		return -EIO; /* this inode is for metadata and corrupted */
460	if (inode->i_nlink == 0)
461		return -ESTALE; /* this inode is deleted */
462
463	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
464	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
465#if 0
466	ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
467	ii->i_dir_acl = S_ISREG(inode->i_mode) ?
468		0 : le32_to_cpu(raw_inode->i_dir_acl);
469#endif
470	ii->i_dir_start_lookup = 0;
471	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
472
473	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
474	    S_ISLNK(inode->i_mode)) {
475		err = nilfs_bmap_read(ii->i_bmap, raw_inode);
476		if (err < 0)
477			return err;
478		set_bit(NILFS_I_BMAP, &ii->i_state);
479		/* No lock is needed; iget() ensures it. */
480	}
481	return 0;
482}
483
484static int __nilfs_read_inode(struct super_block *sb,
485			      struct nilfs_root *root, unsigned long ino,
486			      struct inode *inode)
487{
488	struct the_nilfs *nilfs = sb->s_fs_info;
489	struct buffer_head *bh;
490	struct nilfs_inode *raw_inode;
491	int err;
492
493	down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
494	err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh);
495	if (unlikely(err))
496		goto bad_inode;
497
498	raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh);
499
500	err = nilfs_read_inode_common(inode, raw_inode);
501	if (err)
502		goto failed_unmap;
503
504	if (S_ISREG(inode->i_mode)) {
505		inode->i_op = &nilfs_file_inode_operations;
506		inode->i_fop = &nilfs_file_operations;
507		inode->i_mapping->a_ops = &nilfs_aops;
508	} else if (S_ISDIR(inode->i_mode)) {
509		inode->i_op = &nilfs_dir_inode_operations;
510		inode->i_fop = &nilfs_dir_operations;
511		inode->i_mapping->a_ops = &nilfs_aops;
512	} else if (S_ISLNK(inode->i_mode)) {
513		inode->i_op = &nilfs_symlink_inode_operations;
514		inode_nohighmem(inode);
515		inode->i_mapping->a_ops = &nilfs_aops;
516	} else {
517		inode->i_op = &nilfs_special_inode_operations;
518		init_special_inode(
519			inode, inode->i_mode,
520			huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
521	}
522	nilfs_ifile_unmap_inode(root->ifile, ino, bh);
523	brelse(bh);
524	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
525	nilfs_set_inode_flags(inode);
526	mapping_set_gfp_mask(inode->i_mapping,
527			   mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
528	return 0;
529
530 failed_unmap:
531	nilfs_ifile_unmap_inode(root->ifile, ino, bh);
532	brelse(bh);
533
534 bad_inode:
535	up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
536	return err;
537}
538
539static int nilfs_iget_test(struct inode *inode, void *opaque)
540{
541	struct nilfs_iget_args *args = opaque;
542	struct nilfs_inode_info *ii;
543
544	if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root)
545		return 0;
546
547	ii = NILFS_I(inode);
548	if (test_bit(NILFS_I_BTNC, &ii->i_state)) {
549		if (!args->for_btnc)
550			return 0;
551	} else if (args->for_btnc) {
552		return 0;
553	}
554	if (test_bit(NILFS_I_SHADOW, &ii->i_state)) {
555		if (!args->for_shadow)
556			return 0;
557	} else if (args->for_shadow) {
558		return 0;
559	}
560
561	if (!test_bit(NILFS_I_GCINODE, &ii->i_state))
562		return !args->for_gc;
563
564	return args->for_gc && args->cno == ii->i_cno;
565}
566
567static int nilfs_iget_set(struct inode *inode, void *opaque)
568{
569	struct nilfs_iget_args *args = opaque;
570
571	inode->i_ino = args->ino;
572	NILFS_I(inode)->i_cno = args->cno;
573	NILFS_I(inode)->i_root = args->root;
574	if (args->root && args->ino == NILFS_ROOT_INO)
575		nilfs_get_root(args->root);
576
577	if (args->for_gc)
578		NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE);
579	if (args->for_btnc)
580		NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC);
581	if (args->for_shadow)
582		NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW);
583	return 0;
584}
585
586struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
587			    unsigned long ino)
588{
589	struct nilfs_iget_args args = {
590		.ino = ino, .root = root, .cno = 0, .for_gc = false,
591		.for_btnc = false, .for_shadow = false
592	};
593
594	return ilookup5(sb, ino, nilfs_iget_test, &args);
595}
596
597struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
598				unsigned long ino)
599{
600	struct nilfs_iget_args args = {
601		.ino = ino, .root = root, .cno = 0, .for_gc = false,
602		.for_btnc = false, .for_shadow = false
603	};
604
605	return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
606}
607
608struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root,
609			 unsigned long ino)
610{
611	struct inode *inode;
612	int err;
613
614	inode = nilfs_iget_locked(sb, root, ino);
615	if (unlikely(!inode))
616		return ERR_PTR(-ENOMEM);
617	if (!(inode->i_state & I_NEW))
618		return inode;
619
620	err = __nilfs_read_inode(sb, root, ino, inode);
621	if (unlikely(err)) {
622		iget_failed(inode);
623		return ERR_PTR(err);
624	}
625	unlock_new_inode(inode);
626	return inode;
627}
628
629struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino,
630				__u64 cno)
631{
632	struct nilfs_iget_args args = {
633		.ino = ino, .root = NULL, .cno = cno, .for_gc = true,
634		.for_btnc = false, .for_shadow = false
635	};
636	struct inode *inode;
637	int err;
638
639	inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args);
640	if (unlikely(!inode))
641		return ERR_PTR(-ENOMEM);
642	if (!(inode->i_state & I_NEW))
643		return inode;
644
645	err = nilfs_init_gcinode(inode);
646	if (unlikely(err)) {
647		iget_failed(inode);
648		return ERR_PTR(err);
649	}
650	unlock_new_inode(inode);
651	return inode;
652}
653
654/**
655 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode
656 * @inode: inode object
657 *
658 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode,
659 * or does nothing if the inode already has it.  This function allocates
660 * an additional inode to maintain page cache of B-tree nodes one-on-one.
661 *
662 * Return Value: On success, 0 is returned. On errors, one of the following
663 * negative error code is returned.
664 *
665 * %-ENOMEM - Insufficient memory available.
666 */
667int nilfs_attach_btree_node_cache(struct inode *inode)
668{
669	struct nilfs_inode_info *ii = NILFS_I(inode);
670	struct inode *btnc_inode;
671	struct nilfs_iget_args args;
672
673	if (ii->i_assoc_inode)
674		return 0;
675
676	args.ino = inode->i_ino;
677	args.root = ii->i_root;
678	args.cno = ii->i_cno;
679	args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0;
680	args.for_btnc = true;
681	args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0;
682
683	btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
684				  nilfs_iget_set, &args);
685	if (unlikely(!btnc_inode))
686		return -ENOMEM;
687	if (btnc_inode->i_state & I_NEW) {
688		nilfs_init_btnc_inode(btnc_inode);
689		unlock_new_inode(btnc_inode);
690	}
691	NILFS_I(btnc_inode)->i_assoc_inode = inode;
692	NILFS_I(btnc_inode)->i_bmap = ii->i_bmap;
693	ii->i_assoc_inode = btnc_inode;
694
695	return 0;
696}
697
698/**
699 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode
700 * @inode: inode object
701 *
702 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its
703 * holder inode bound to @inode, or does nothing if @inode doesn't have it.
704 */
705void nilfs_detach_btree_node_cache(struct inode *inode)
706{
707	struct nilfs_inode_info *ii = NILFS_I(inode);
708	struct inode *btnc_inode = ii->i_assoc_inode;
709
710	if (btnc_inode) {
711		NILFS_I(btnc_inode)->i_assoc_inode = NULL;
712		ii->i_assoc_inode = NULL;
713		iput(btnc_inode);
714	}
715}
716
717/**
718 * nilfs_iget_for_shadow - obtain inode for shadow mapping
719 * @inode: inode object that uses shadow mapping
720 *
721 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page
722 * caches for shadow mapping.  The page cache for data pages is set up
723 * in one inode and the one for b-tree node pages is set up in the
724 * other inode, which is attached to the former inode.
725 *
726 * Return Value: On success, a pointer to the inode for data pages is
727 * returned. On errors, one of the following negative error code is returned
728 * in a pointer type.
729 *
730 * %-ENOMEM - Insufficient memory available.
731 */
732struct inode *nilfs_iget_for_shadow(struct inode *inode)
733{
734	struct nilfs_iget_args args = {
735		.ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false,
736		.for_btnc = false, .for_shadow = true
737	};
738	struct inode *s_inode;
739	int err;
740
741	s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test,
742			       nilfs_iget_set, &args);
743	if (unlikely(!s_inode))
744		return ERR_PTR(-ENOMEM);
745	if (!(s_inode->i_state & I_NEW))
746		return inode;
747
748	NILFS_I(s_inode)->i_flags = 0;
749	memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap));
750	mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS);
751
752	err = nilfs_attach_btree_node_cache(s_inode);
753	if (unlikely(err)) {
754		iget_failed(s_inode);
755		return ERR_PTR(err);
756	}
757	unlock_new_inode(s_inode);
758	return s_inode;
759}
760
761void nilfs_write_inode_common(struct inode *inode,
762			      struct nilfs_inode *raw_inode, int has_bmap)
763{
764	struct nilfs_inode_info *ii = NILFS_I(inode);
765
766	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
767	raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
768	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
769	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
770	raw_inode->i_size = cpu_to_le64(inode->i_size);
771	raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
772	raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
773	raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
774	raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
775	raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
776
777	raw_inode->i_flags = cpu_to_le32(ii->i_flags);
778	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
779
780	if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
781		struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
782
783		/* zero-fill unused portion in the case of super root block */
784		raw_inode->i_xattr = 0;
785		raw_inode->i_pad = 0;
786		memset((void *)raw_inode + sizeof(*raw_inode), 0,
787		       nilfs->ns_inode_size - sizeof(*raw_inode));
788	}
789
790	if (has_bmap)
791		nilfs_bmap_write(ii->i_bmap, raw_inode);
792	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
793		raw_inode->i_device_code =
794			cpu_to_le64(huge_encode_dev(inode->i_rdev));
795	/*
796	 * When extending inode, nilfs->ns_inode_size should be checked
797	 * for substitutions of appended fields.
798	 */
799}
800
801void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
802{
803	ino_t ino = inode->i_ino;
804	struct nilfs_inode_info *ii = NILFS_I(inode);
805	struct inode *ifile = ii->i_root->ifile;
806	struct nilfs_inode *raw_inode;
807
808	raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh);
809
810	if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
811		memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size);
812	if (flags & I_DIRTY_DATASYNC)
813		set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
814
815	nilfs_write_inode_common(inode, raw_inode, 0);
816		/*
817		 * XXX: call with has_bmap = 0 is a workaround to avoid
818		 * deadlock of bmap.  This delays update of i_bmap to just
819		 * before writing.
820		 */
821
822	nilfs_ifile_unmap_inode(ifile, ino, ibh);
823}
824
825#define NILFS_MAX_TRUNCATE_BLOCKS	16384  /* 64MB for 4KB block */
826
827static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
828				unsigned long from)
829{
830	__u64 b;
831	int ret;
832
833	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
834		return;
835repeat:
836	ret = nilfs_bmap_last_key(ii->i_bmap, &b);
837	if (ret == -ENOENT)
838		return;
839	else if (ret < 0)
840		goto failed;
841
842	if (b < from)
843		return;
844
845	b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
846	ret = nilfs_bmap_truncate(ii->i_bmap, b);
847	nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
848	if (!ret || (ret == -ENOMEM &&
849		     nilfs_bmap_truncate(ii->i_bmap, b) == 0))
850		goto repeat;
851
852failed:
853	nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
854		   ret, ii->vfs_inode.i_ino);
855}
856
857void nilfs_truncate(struct inode *inode)
858{
859	unsigned long blkoff;
860	unsigned int blocksize;
861	struct nilfs_transaction_info ti;
862	struct super_block *sb = inode->i_sb;
863	struct nilfs_inode_info *ii = NILFS_I(inode);
864
865	if (!test_bit(NILFS_I_BMAP, &ii->i_state))
866		return;
867	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
868		return;
869
870	blocksize = sb->s_blocksize;
871	blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
872	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
873
874	block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
875
876	nilfs_truncate_bmap(ii, blkoff);
877
878	inode->i_mtime = inode_set_ctime_current(inode);
879	if (IS_SYNC(inode))
880		nilfs_set_transaction_flag(NILFS_TI_SYNC);
881
882	nilfs_mark_inode_dirty(inode);
883	nilfs_set_file_dirty(inode, 0);
884	nilfs_transaction_commit(sb);
885	/*
886	 * May construct a logical segment and may fail in sync mode.
887	 * But truncate has no return value.
888	 */
889}
890
891static void nilfs_clear_inode(struct inode *inode)
892{
893	struct nilfs_inode_info *ii = NILFS_I(inode);
894
895	/*
896	 * Free resources allocated in nilfs_read_inode(), here.
897	 */
898	BUG_ON(!list_empty(&ii->i_dirty));
899	brelse(ii->i_bh);
900	ii->i_bh = NULL;
901
902	if (nilfs_is_metadata_file_inode(inode))
903		nilfs_mdt_clear(inode);
904
905	if (test_bit(NILFS_I_BMAP, &ii->i_state))
906		nilfs_bmap_clear(ii->i_bmap);
907
908	if (!test_bit(NILFS_I_BTNC, &ii->i_state))
909		nilfs_detach_btree_node_cache(inode);
910
911	if (ii->i_root && inode->i_ino == NILFS_ROOT_INO)
912		nilfs_put_root(ii->i_root);
913}
914
915void nilfs_evict_inode(struct inode *inode)
916{
917	struct nilfs_transaction_info ti;
918	struct super_block *sb = inode->i_sb;
919	struct nilfs_inode_info *ii = NILFS_I(inode);
920	struct the_nilfs *nilfs;
921	int ret;
922
923	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
924		truncate_inode_pages_final(&inode->i_data);
925		clear_inode(inode);
926		nilfs_clear_inode(inode);
927		return;
928	}
929	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
930
931	truncate_inode_pages_final(&inode->i_data);
932
933	nilfs = sb->s_fs_info;
934	if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
935		/*
936		 * If this inode is about to be disposed after the file system
937		 * has been degraded to read-only due to file system corruption
938		 * or after the writer has been detached, do not make any
939		 * changes that cause writes, just clear it.
940		 * Do this check after read-locking ns_segctor_sem by
941		 * nilfs_transaction_begin() in order to avoid a race with
942		 * the writer detach operation.
943		 */
944		clear_inode(inode);
945		nilfs_clear_inode(inode);
946		nilfs_transaction_abort(sb);
947		return;
948	}
949
950	/* TODO: some of the following operations may fail.  */
951	nilfs_truncate_bmap(ii, 0);
952	nilfs_mark_inode_dirty(inode);
953	clear_inode(inode);
954
955	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
956	if (!ret)
957		atomic64_dec(&ii->i_root->inodes_count);
958
959	nilfs_clear_inode(inode);
960
961	if (IS_SYNC(inode))
962		nilfs_set_transaction_flag(NILFS_TI_SYNC);
963	nilfs_transaction_commit(sb);
964	/*
965	 * May construct a logical segment and may fail in sync mode.
966	 * But delete_inode has no return value.
967	 */
968}
969
970int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
971		  struct iattr *iattr)
972{
973	struct nilfs_transaction_info ti;
974	struct inode *inode = d_inode(dentry);
975	struct super_block *sb = inode->i_sb;
976	int err;
977
978	err = setattr_prepare(&nop_mnt_idmap, dentry, iattr);
979	if (err)
980		return err;
981
982	err = nilfs_transaction_begin(sb, &ti, 0);
983	if (unlikely(err))
984		return err;
985
986	if ((iattr->ia_valid & ATTR_SIZE) &&
987	    iattr->ia_size != i_size_read(inode)) {
988		inode_dio_wait(inode);
989		truncate_setsize(inode, iattr->ia_size);
990		nilfs_truncate(inode);
991	}
992
993	setattr_copy(&nop_mnt_idmap, inode, iattr);
994	mark_inode_dirty(inode);
995
996	if (iattr->ia_valid & ATTR_MODE) {
997		err = nilfs_acl_chmod(inode);
998		if (unlikely(err))
999			goto out_err;
1000	}
1001
1002	return nilfs_transaction_commit(sb);
1003
1004out_err:
1005	nilfs_transaction_abort(sb);
1006	return err;
1007}
1008
1009int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
1010		     int mask)
1011{
1012	struct nilfs_root *root = NILFS_I(inode)->i_root;
1013
1014	if ((mask & MAY_WRITE) && root &&
1015	    root->cno != NILFS_CPTREE_CURRENT_CNO)
1016		return -EROFS; /* snapshot is not writable */
1017
1018	return generic_permission(&nop_mnt_idmap, inode, mask);
1019}
1020
1021int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
1022{
1023	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1024	struct nilfs_inode_info *ii = NILFS_I(inode);
1025	int err;
1026
1027	spin_lock(&nilfs->ns_inode_lock);
1028	if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
1029		spin_unlock(&nilfs->ns_inode_lock);
1030		err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
1031						  inode->i_ino, pbh);
1032		if (unlikely(err))
1033			return err;
1034		spin_lock(&nilfs->ns_inode_lock);
1035		if (ii->i_bh == NULL)
1036			ii->i_bh = *pbh;
1037		else if (unlikely(!buffer_uptodate(ii->i_bh))) {
1038			__brelse(ii->i_bh);
1039			ii->i_bh = *pbh;
1040		} else {
1041			brelse(*pbh);
1042			*pbh = ii->i_bh;
1043		}
1044	} else
1045		*pbh = ii->i_bh;
1046
1047	get_bh(*pbh);
1048	spin_unlock(&nilfs->ns_inode_lock);
1049	return 0;
1050}
1051
1052int nilfs_inode_dirty(struct inode *inode)
1053{
1054	struct nilfs_inode_info *ii = NILFS_I(inode);
1055	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1056	int ret = 0;
1057
1058	if (!list_empty(&ii->i_dirty)) {
1059		spin_lock(&nilfs->ns_inode_lock);
1060		ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
1061			test_bit(NILFS_I_BUSY, &ii->i_state);
1062		spin_unlock(&nilfs->ns_inode_lock);
1063	}
1064	return ret;
1065}
1066
1067int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
1068{
1069	struct nilfs_inode_info *ii = NILFS_I(inode);
1070	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1071
1072	atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
1073
1074	if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
1075		return 0;
1076
1077	spin_lock(&nilfs->ns_inode_lock);
1078	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
1079	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
1080		/*
1081		 * Because this routine may race with nilfs_dispose_list(),
1082		 * we have to check NILFS_I_QUEUED here, too.
1083		 */
1084		if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
1085			/*
1086			 * This will happen when somebody is freeing
1087			 * this inode.
1088			 */
1089			nilfs_warn(inode->i_sb,
1090				   "cannot set file dirty (ino=%lu): the file is being freed",
1091				   inode->i_ino);
1092			spin_unlock(&nilfs->ns_inode_lock);
1093			return -EINVAL; /*
1094					 * NILFS_I_DIRTY may remain for
1095					 * freeing inode.
1096					 */
1097		}
1098		list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
1099		set_bit(NILFS_I_QUEUED, &ii->i_state);
1100	}
1101	spin_unlock(&nilfs->ns_inode_lock);
1102	return 0;
1103}
1104
1105int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
1106{
1107	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1108	struct buffer_head *ibh;
1109	int err;
1110
1111	/*
1112	 * Do not dirty inodes after the log writer has been detached
1113	 * and its nilfs_root struct has been freed.
1114	 */
1115	if (unlikely(nilfs_purging(nilfs)))
1116		return 0;
1117
1118	err = nilfs_load_inode_block(inode, &ibh);
1119	if (unlikely(err)) {
1120		nilfs_warn(inode->i_sb,
1121			   "cannot mark inode dirty (ino=%lu): error %d loading inode block",
1122			   inode->i_ino, err);
1123		return err;
1124	}
1125	nilfs_update_inode(inode, ibh, flags);
1126	mark_buffer_dirty(ibh);
1127	nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
1128	brelse(ibh);
1129	return 0;
1130}
1131
1132/**
1133 * nilfs_dirty_inode - reflect changes on given inode to an inode block.
1134 * @inode: inode of the file to be registered.
1135 * @flags: flags to determine the dirty state of the inode
1136 *
1137 * nilfs_dirty_inode() loads a inode block containing the specified
1138 * @inode and copies data from a nilfs_inode to a corresponding inode
1139 * entry in the inode block. This operation is excluded from the segment
1140 * construction. This function can be called both as a single operation
1141 * and as a part of indivisible file operations.
1142 */
1143void nilfs_dirty_inode(struct inode *inode, int flags)
1144{
1145	struct nilfs_transaction_info ti;
1146	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
1147
1148	if (is_bad_inode(inode)) {
1149		nilfs_warn(inode->i_sb,
1150			   "tried to mark bad_inode dirty. ignored.");
1151		dump_stack();
1152		return;
1153	}
1154	if (mdi) {
1155		nilfs_mdt_mark_dirty(inode);
1156		return;
1157	}
1158	nilfs_transaction_begin(inode->i_sb, &ti, 0);
1159	__nilfs_mark_inode_dirty(inode, flags);
1160	nilfs_transaction_commit(inode->i_sb); /* never fails */
1161}
1162
1163int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1164		 __u64 start, __u64 len)
1165{
1166	struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
1167	__u64 logical = 0, phys = 0, size = 0;
1168	__u32 flags = 0;
1169	loff_t isize;
1170	sector_t blkoff, end_blkoff;
1171	sector_t delalloc_blkoff;
1172	unsigned long delalloc_blklen;
1173	unsigned int blkbits = inode->i_blkbits;
1174	int ret, n;
1175
1176	ret = fiemap_prep(inode, fieinfo, start, &len, 0);
1177	if (ret)
1178		return ret;
1179
1180	inode_lock(inode);
1181
1182	isize = i_size_read(inode);
1183
1184	blkoff = start >> blkbits;
1185	end_blkoff = (start + len - 1) >> blkbits;
1186
1187	delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff,
1188							&delalloc_blkoff);
1189
1190	do {
1191		__u64 blkphy;
1192		unsigned int maxblocks;
1193
1194		if (delalloc_blklen && blkoff == delalloc_blkoff) {
1195			if (size) {
1196				/* End of the current extent */
1197				ret = fiemap_fill_next_extent(
1198					fieinfo, logical, phys, size, flags);
1199				if (ret)
1200					break;
1201			}
1202			if (blkoff > end_blkoff)
1203				break;
1204
1205			flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC;
1206			logical = blkoff << blkbits;
1207			phys = 0;
1208			size = delalloc_blklen << blkbits;
1209
1210			blkoff = delalloc_blkoff + delalloc_blklen;
1211			delalloc_blklen = nilfs_find_uncommitted_extent(
1212				inode, blkoff, &delalloc_blkoff);
1213			continue;
1214		}
1215
1216		/*
1217		 * Limit the number of blocks that we look up so as
1218		 * not to get into the next delayed allocation extent.
1219		 */
1220		maxblocks = INT_MAX;
1221		if (delalloc_blklen)
1222			maxblocks = min_t(sector_t, delalloc_blkoff - blkoff,
1223					  maxblocks);
1224		blkphy = 0;
1225
1226		down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1227		n = nilfs_bmap_lookup_contig(
1228			NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks);
1229		up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
1230
1231		if (n < 0) {
1232			int past_eof;
1233
1234			if (unlikely(n != -ENOENT))
1235				break; /* error */
1236
1237			/* HOLE */
1238			blkoff++;
1239			past_eof = ((blkoff << blkbits) >= isize);
1240
1241			if (size) {
1242				/* End of the current extent */
1243
1244				if (past_eof)
1245					flags |= FIEMAP_EXTENT_LAST;
1246
1247				ret = fiemap_fill_next_extent(
1248					fieinfo, logical, phys, size, flags);
1249				if (ret)
1250					break;
1251				size = 0;
1252			}
1253			if (blkoff > end_blkoff || past_eof)
1254				break;
1255		} else {
1256			if (size) {
1257				if (phys && blkphy << blkbits == phys + size) {
1258					/* The current extent goes on */
1259					size += n << blkbits;
1260				} else {
1261					/* Terminate the current extent */
1262					ret = fiemap_fill_next_extent(
1263						fieinfo, logical, phys, size,
1264						flags);
1265					if (ret || blkoff > end_blkoff)
1266						break;
1267
1268					/* Start another extent */
1269					flags = FIEMAP_EXTENT_MERGED;
1270					logical = blkoff << blkbits;
1271					phys = blkphy << blkbits;
1272					size = n << blkbits;
1273				}
1274			} else {
1275				/* Start a new extent */
1276				flags = FIEMAP_EXTENT_MERGED;
1277				logical = blkoff << blkbits;
1278				phys = blkphy << blkbits;
1279				size = n << blkbits;
1280			}
1281			blkoff += n;
1282		}
1283		cond_resched();
1284	} while (true);
1285
1286	/* If ret is 1 then we just hit the end of the extent array */
1287	if (ret == 1)
1288		ret = 0;
1289
1290	inode_unlock(inode);
1291	return ret;
1292}
1293