xref: /kernel/linux/linux-5.10/fs/nilfs2/segment.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * segment.c - NILFS segment constructor.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Ryusuke Konishi.
8 *
9 */
10
11#include <linux/pagemap.h>
12#include <linux/buffer_head.h>
13#include <linux/writeback.h>
14#include <linux/bitops.h>
15#include <linux/bio.h>
16#include <linux/completion.h>
17#include <linux/blkdev.h>
18#include <linux/backing-dev.h>
19#include <linux/freezer.h>
20#include <linux/kthread.h>
21#include <linux/crc32.h>
22#include <linux/pagevec.h>
23#include <linux/slab.h>
24#include <linux/sched/signal.h>
25
26#include "nilfs.h"
27#include "btnode.h"
28#include "page.h"
29#include "segment.h"
30#include "sufile.h"
31#include "cpfile.h"
32#include "ifile.h"
33#include "segbuf.h"
34
35
36/*
37 * Segment constructor
38 */
39#define SC_N_INODEVEC	16   /* Size of locally allocated inode vector */
40
41#define SC_MAX_SEGDELTA 64   /*
42			      * Upper limit of the number of segments
43			      * appended in collection retry loop
44			      */
45
46/* Construction mode */
47enum {
48	SC_LSEG_SR = 1,	/* Make a logical segment having a super root */
49	SC_LSEG_DSYNC,	/*
50			 * Flush data blocks of a given file and make
51			 * a logical segment without a super root.
52			 */
53	SC_FLUSH_FILE,	/*
54			 * Flush data files, leads to segment writes without
55			 * creating a checkpoint.
56			 */
57	SC_FLUSH_DAT,	/*
58			 * Flush DAT file.  This also creates segments
59			 * without a checkpoint.
60			 */
61};
62
63/* Stage numbers of dirty block collection */
64enum {
65	NILFS_ST_INIT = 0,
66	NILFS_ST_GC,		/* Collecting dirty blocks for GC */
67	NILFS_ST_FILE,
68	NILFS_ST_IFILE,
69	NILFS_ST_CPFILE,
70	NILFS_ST_SUFILE,
71	NILFS_ST_DAT,
72	NILFS_ST_SR,		/* Super root */
73	NILFS_ST_DSYNC,		/* Data sync blocks */
74	NILFS_ST_DONE,
75};
76
77#define CREATE_TRACE_POINTS
78#include <trace/events/nilfs2.h>
79
80/*
81 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
82 * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
83 * the variable must use them because transition of stage count must involve
84 * trace events (trace_nilfs2_collection_stage_transition).
85 *
86 * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
87 * produce tracepoint events. It is provided just for making the intention
88 * clear.
89 */
90static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
91{
92	sci->sc_stage.scnt++;
93	trace_nilfs2_collection_stage_transition(sci);
94}
95
96static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
97{
98	sci->sc_stage.scnt = next_scnt;
99	trace_nilfs2_collection_stage_transition(sci);
100}
101
102static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
103{
104	return sci->sc_stage.scnt;
105}
106
107/* State flags of collection */
108#define NILFS_CF_NODE		0x0001	/* Collecting node blocks */
109#define NILFS_CF_IFILE_STARTED	0x0002	/* IFILE stage has started */
110#define NILFS_CF_SUFREED	0x0004	/* segment usages has been freed */
111#define NILFS_CF_HISTORY_MASK	(NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
112
113/* Operations depending on the construction mode and file type */
114struct nilfs_sc_operations {
115	int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
116			    struct inode *);
117	int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
118			    struct inode *);
119	int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
120			    struct inode *);
121	void (*write_data_binfo)(struct nilfs_sc_info *,
122				 struct nilfs_segsum_pointer *,
123				 union nilfs_binfo *);
124	void (*write_node_binfo)(struct nilfs_sc_info *,
125				 struct nilfs_segsum_pointer *,
126				 union nilfs_binfo *);
127};
128
129/*
130 * Other definitions
131 */
132static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
133static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
134static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
135static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
136
137#define nilfs_cnt32_gt(a, b)   \
138	(typecheck(__u32, a) && typecheck(__u32, b) && \
139	 ((__s32)(b) - (__s32)(a) < 0))
140#define nilfs_cnt32_ge(a, b)   \
141	(typecheck(__u32, a) && typecheck(__u32, b) && \
142	 ((__s32)(a) - (__s32)(b) >= 0))
143#define nilfs_cnt32_lt(a, b)  nilfs_cnt32_gt(b, a)
144#define nilfs_cnt32_le(a, b)  nilfs_cnt32_ge(b, a)
145
146static int nilfs_prepare_segment_lock(struct super_block *sb,
147				      struct nilfs_transaction_info *ti)
148{
149	struct nilfs_transaction_info *cur_ti = current->journal_info;
150	void *save = NULL;
151
152	if (cur_ti) {
153		if (cur_ti->ti_magic == NILFS_TI_MAGIC)
154			return ++cur_ti->ti_count;
155
156		/*
157		 * If journal_info field is occupied by other FS,
158		 * it is saved and will be restored on
159		 * nilfs_transaction_commit().
160		 */
161		nilfs_warn(sb, "journal info from a different FS");
162		save = current->journal_info;
163	}
164	if (!ti) {
165		ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
166		if (!ti)
167			return -ENOMEM;
168		ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
169	} else {
170		ti->ti_flags = 0;
171	}
172	ti->ti_count = 0;
173	ti->ti_save = save;
174	ti->ti_magic = NILFS_TI_MAGIC;
175	current->journal_info = ti;
176	return 0;
177}
178
179/**
180 * nilfs_transaction_begin - start indivisible file operations.
181 * @sb: super block
182 * @ti: nilfs_transaction_info
183 * @vacancy_check: flags for vacancy rate checks
184 *
185 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
186 * the segment semaphore, to make a segment construction and write tasks
187 * exclusive.  The function is used with nilfs_transaction_commit() in pairs.
188 * The region enclosed by these two functions can be nested.  To avoid a
189 * deadlock, the semaphore is only acquired or released in the outermost call.
190 *
191 * This function allocates a nilfs_transaction_info struct to keep context
192 * information on it.  It is initialized and hooked onto the current task in
193 * the outermost call.  If a pre-allocated struct is given to @ti, it is used
194 * instead; otherwise a new struct is assigned from a slab.
195 *
196 * When @vacancy_check flag is set, this function will check the amount of
197 * free space, and will wait for the GC to reclaim disk space if low capacity.
198 *
199 * Return Value: On success, 0 is returned. On error, one of the following
200 * negative error code is returned.
201 *
202 * %-ENOMEM - Insufficient memory available.
203 *
204 * %-ENOSPC - No space left on device
205 */
206int nilfs_transaction_begin(struct super_block *sb,
207			    struct nilfs_transaction_info *ti,
208			    int vacancy_check)
209{
210	struct the_nilfs *nilfs;
211	int ret = nilfs_prepare_segment_lock(sb, ti);
212	struct nilfs_transaction_info *trace_ti;
213
214	if (unlikely(ret < 0))
215		return ret;
216	if (ret > 0) {
217		trace_ti = current->journal_info;
218
219		trace_nilfs2_transaction_transition(sb, trace_ti,
220				    trace_ti->ti_count, trace_ti->ti_flags,
221				    TRACE_NILFS2_TRANSACTION_BEGIN);
222		return 0;
223	}
224
225	sb_start_intwrite(sb);
226
227	nilfs = sb->s_fs_info;
228	down_read(&nilfs->ns_segctor_sem);
229	if (vacancy_check && nilfs_near_disk_full(nilfs)) {
230		up_read(&nilfs->ns_segctor_sem);
231		ret = -ENOSPC;
232		goto failed;
233	}
234
235	trace_ti = current->journal_info;
236	trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
237					    trace_ti->ti_flags,
238					    TRACE_NILFS2_TRANSACTION_BEGIN);
239	return 0;
240
241 failed:
242	ti = current->journal_info;
243	current->journal_info = ti->ti_save;
244	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
245		kmem_cache_free(nilfs_transaction_cachep, ti);
246	sb_end_intwrite(sb);
247	return ret;
248}
249
250/**
251 * nilfs_transaction_commit - commit indivisible file operations.
252 * @sb: super block
253 *
254 * nilfs_transaction_commit() releases the read semaphore which is
255 * acquired by nilfs_transaction_begin(). This is only performed
256 * in outermost call of this function.  If a commit flag is set,
257 * nilfs_transaction_commit() sets a timer to start the segment
258 * constructor.  If a sync flag is set, it starts construction
259 * directly.
260 */
261int nilfs_transaction_commit(struct super_block *sb)
262{
263	struct nilfs_transaction_info *ti = current->journal_info;
264	struct the_nilfs *nilfs = sb->s_fs_info;
265	int err = 0;
266
267	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
268	ti->ti_flags |= NILFS_TI_COMMIT;
269	if (ti->ti_count > 0) {
270		ti->ti_count--;
271		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
272			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
273		return 0;
274	}
275	if (nilfs->ns_writer) {
276		struct nilfs_sc_info *sci = nilfs->ns_writer;
277
278		if (ti->ti_flags & NILFS_TI_COMMIT)
279			nilfs_segctor_start_timer(sci);
280		if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
281			nilfs_segctor_do_flush(sci, 0);
282	}
283	up_read(&nilfs->ns_segctor_sem);
284	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
285			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
286
287	current->journal_info = ti->ti_save;
288
289	if (ti->ti_flags & NILFS_TI_SYNC)
290		err = nilfs_construct_segment(sb);
291	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
292		kmem_cache_free(nilfs_transaction_cachep, ti);
293	sb_end_intwrite(sb);
294	return err;
295}
296
297void nilfs_transaction_abort(struct super_block *sb)
298{
299	struct nilfs_transaction_info *ti = current->journal_info;
300	struct the_nilfs *nilfs = sb->s_fs_info;
301
302	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
303	if (ti->ti_count > 0) {
304		ti->ti_count--;
305		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
306			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
307		return;
308	}
309	up_read(&nilfs->ns_segctor_sem);
310
311	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
312		    ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
313
314	current->journal_info = ti->ti_save;
315	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
316		kmem_cache_free(nilfs_transaction_cachep, ti);
317	sb_end_intwrite(sb);
318}
319
320void nilfs_relax_pressure_in_lock(struct super_block *sb)
321{
322	struct the_nilfs *nilfs = sb->s_fs_info;
323	struct nilfs_sc_info *sci = nilfs->ns_writer;
324
325	if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
326		return;
327
328	set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
329	up_read(&nilfs->ns_segctor_sem);
330
331	down_write(&nilfs->ns_segctor_sem);
332	if (sci->sc_flush_request &&
333	    test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
334		struct nilfs_transaction_info *ti = current->journal_info;
335
336		ti->ti_flags |= NILFS_TI_WRITER;
337		nilfs_segctor_do_immediate_flush(sci);
338		ti->ti_flags &= ~NILFS_TI_WRITER;
339	}
340	downgrade_write(&nilfs->ns_segctor_sem);
341}
342
343static void nilfs_transaction_lock(struct super_block *sb,
344				   struct nilfs_transaction_info *ti,
345				   int gcflag)
346{
347	struct nilfs_transaction_info *cur_ti = current->journal_info;
348	struct the_nilfs *nilfs = sb->s_fs_info;
349	struct nilfs_sc_info *sci = nilfs->ns_writer;
350
351	WARN_ON(cur_ti);
352	ti->ti_flags = NILFS_TI_WRITER;
353	ti->ti_count = 0;
354	ti->ti_save = cur_ti;
355	ti->ti_magic = NILFS_TI_MAGIC;
356	current->journal_info = ti;
357
358	for (;;) {
359		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
360			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
361
362		down_write(&nilfs->ns_segctor_sem);
363		if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
364			break;
365
366		nilfs_segctor_do_immediate_flush(sci);
367
368		up_write(&nilfs->ns_segctor_sem);
369		cond_resched();
370	}
371	if (gcflag)
372		ti->ti_flags |= NILFS_TI_GC;
373
374	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
375			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
376}
377
378static void nilfs_transaction_unlock(struct super_block *sb)
379{
380	struct nilfs_transaction_info *ti = current->journal_info;
381	struct the_nilfs *nilfs = sb->s_fs_info;
382
383	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
384	BUG_ON(ti->ti_count > 0);
385
386	up_write(&nilfs->ns_segctor_sem);
387	current->journal_info = ti->ti_save;
388
389	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
390			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
391}
392
393static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
394					    struct nilfs_segsum_pointer *ssp,
395					    unsigned int bytes)
396{
397	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
398	unsigned int blocksize = sci->sc_super->s_blocksize;
399	void *p;
400
401	if (unlikely(ssp->offset + bytes > blocksize)) {
402		ssp->offset = 0;
403		BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
404					       &segbuf->sb_segsum_buffers));
405		ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
406	}
407	p = ssp->bh->b_data + ssp->offset;
408	ssp->offset += bytes;
409	return p;
410}
411
412/**
413 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
414 * @sci: nilfs_sc_info
415 */
416static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
417{
418	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
419	struct buffer_head *sumbh;
420	unsigned int sumbytes;
421	unsigned int flags = 0;
422	int err;
423
424	if (nilfs_doing_gc())
425		flags = NILFS_SS_GC;
426	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
427	if (unlikely(err))
428		return err;
429
430	sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
431	sumbytes = segbuf->sb_sum.sumbytes;
432	sci->sc_finfo_ptr.bh = sumbh;  sci->sc_finfo_ptr.offset = sumbytes;
433	sci->sc_binfo_ptr.bh = sumbh;  sci->sc_binfo_ptr.offset = sumbytes;
434	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
435	return 0;
436}
437
438/**
439 * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
440 * @sci: segment constructor object
441 *
442 * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
443 * the current segment summary block.
444 */
445static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
446{
447	struct nilfs_segsum_pointer *ssp;
448
449	ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
450	if (ssp->offset < ssp->bh->b_size)
451		memset(ssp->bh->b_data + ssp->offset, 0,
452		       ssp->bh->b_size - ssp->offset);
453}
454
455static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
456{
457	sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
458	if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
459		return -E2BIG; /*
460				* The current segment is filled up
461				* (internal code)
462				*/
463	nilfs_segctor_zeropad_segsum(sci);
464	sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
465	return nilfs_segctor_reset_segment_buffer(sci);
466}
467
468static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
469{
470	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
471	int err;
472
473	if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
474		err = nilfs_segctor_feed_segment(sci);
475		if (err)
476			return err;
477		segbuf = sci->sc_curseg;
478	}
479	err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
480	if (likely(!err))
481		segbuf->sb_sum.flags |= NILFS_SS_SR;
482	return err;
483}
484
485/*
486 * Functions for making segment summary and payloads
487 */
488static int nilfs_segctor_segsum_block_required(
489	struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
490	unsigned int binfo_size)
491{
492	unsigned int blocksize = sci->sc_super->s_blocksize;
493	/* Size of finfo and binfo is enough small against blocksize */
494
495	return ssp->offset + binfo_size +
496		(!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
497		blocksize;
498}
499
500static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
501				      struct inode *inode)
502{
503	sci->sc_curseg->sb_sum.nfinfo++;
504	sci->sc_binfo_ptr = sci->sc_finfo_ptr;
505	nilfs_segctor_map_segsum_entry(
506		sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
507
508	if (NILFS_I(inode)->i_root &&
509	    !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
510		set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
511	/* skip finfo */
512}
513
514static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
515				    struct inode *inode)
516{
517	struct nilfs_finfo *finfo;
518	struct nilfs_inode_info *ii;
519	struct nilfs_segment_buffer *segbuf;
520	__u64 cno;
521
522	if (sci->sc_blk_cnt == 0)
523		return;
524
525	ii = NILFS_I(inode);
526
527	if (test_bit(NILFS_I_GCINODE, &ii->i_state))
528		cno = ii->i_cno;
529	else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
530		cno = 0;
531	else
532		cno = sci->sc_cno;
533
534	finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
535						 sizeof(*finfo));
536	finfo->fi_ino = cpu_to_le64(inode->i_ino);
537	finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
538	finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
539	finfo->fi_cno = cpu_to_le64(cno);
540
541	segbuf = sci->sc_curseg;
542	segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
543		sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
544	sci->sc_finfo_ptr = sci->sc_binfo_ptr;
545	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
546}
547
548static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
549					struct buffer_head *bh,
550					struct inode *inode,
551					unsigned int binfo_size)
552{
553	struct nilfs_segment_buffer *segbuf;
554	int required, err = 0;
555
556 retry:
557	segbuf = sci->sc_curseg;
558	required = nilfs_segctor_segsum_block_required(
559		sci, &sci->sc_binfo_ptr, binfo_size);
560	if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
561		nilfs_segctor_end_finfo(sci, inode);
562		err = nilfs_segctor_feed_segment(sci);
563		if (err)
564			return err;
565		goto retry;
566	}
567	if (unlikely(required)) {
568		nilfs_segctor_zeropad_segsum(sci);
569		err = nilfs_segbuf_extend_segsum(segbuf);
570		if (unlikely(err))
571			goto failed;
572	}
573	if (sci->sc_blk_cnt == 0)
574		nilfs_segctor_begin_finfo(sci, inode);
575
576	nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
577	/* Substitution to vblocknr is delayed until update_blocknr() */
578	nilfs_segbuf_add_file_buffer(segbuf, bh);
579	sci->sc_blk_cnt++;
580 failed:
581	return err;
582}
583
584/*
585 * Callback functions that enumerate, mark, and collect dirty blocks
586 */
587static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
588				   struct buffer_head *bh, struct inode *inode)
589{
590	int err;
591
592	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
593	if (err < 0)
594		return err;
595
596	err = nilfs_segctor_add_file_block(sci, bh, inode,
597					   sizeof(struct nilfs_binfo_v));
598	if (!err)
599		sci->sc_datablk_cnt++;
600	return err;
601}
602
603static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
604				   struct buffer_head *bh,
605				   struct inode *inode)
606{
607	return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
608}
609
610static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
611				   struct buffer_head *bh,
612				   struct inode *inode)
613{
614	WARN_ON(!buffer_dirty(bh));
615	return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
616}
617
618static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
619					struct nilfs_segsum_pointer *ssp,
620					union nilfs_binfo *binfo)
621{
622	struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
623		sci, ssp, sizeof(*binfo_v));
624	*binfo_v = binfo->bi_v;
625}
626
627static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
628					struct nilfs_segsum_pointer *ssp,
629					union nilfs_binfo *binfo)
630{
631	__le64 *vblocknr = nilfs_segctor_map_segsum_entry(
632		sci, ssp, sizeof(*vblocknr));
633	*vblocknr = binfo->bi_v.bi_vblocknr;
634}
635
636static const struct nilfs_sc_operations nilfs_sc_file_ops = {
637	.collect_data = nilfs_collect_file_data,
638	.collect_node = nilfs_collect_file_node,
639	.collect_bmap = nilfs_collect_file_bmap,
640	.write_data_binfo = nilfs_write_file_data_binfo,
641	.write_node_binfo = nilfs_write_file_node_binfo,
642};
643
644static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
645				  struct buffer_head *bh, struct inode *inode)
646{
647	int err;
648
649	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
650	if (err < 0)
651		return err;
652
653	err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
654	if (!err)
655		sci->sc_datablk_cnt++;
656	return err;
657}
658
659static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
660				  struct buffer_head *bh, struct inode *inode)
661{
662	WARN_ON(!buffer_dirty(bh));
663	return nilfs_segctor_add_file_block(sci, bh, inode,
664					    sizeof(struct nilfs_binfo_dat));
665}
666
667static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
668				       struct nilfs_segsum_pointer *ssp,
669				       union nilfs_binfo *binfo)
670{
671	__le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
672							  sizeof(*blkoff));
673	*blkoff = binfo->bi_dat.bi_blkoff;
674}
675
676static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
677				       struct nilfs_segsum_pointer *ssp,
678				       union nilfs_binfo *binfo)
679{
680	struct nilfs_binfo_dat *binfo_dat =
681		nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
682	*binfo_dat = binfo->bi_dat;
683}
684
685static const struct nilfs_sc_operations nilfs_sc_dat_ops = {
686	.collect_data = nilfs_collect_dat_data,
687	.collect_node = nilfs_collect_file_node,
688	.collect_bmap = nilfs_collect_dat_bmap,
689	.write_data_binfo = nilfs_write_dat_data_binfo,
690	.write_node_binfo = nilfs_write_dat_node_binfo,
691};
692
693static const struct nilfs_sc_operations nilfs_sc_dsync_ops = {
694	.collect_data = nilfs_collect_file_data,
695	.collect_node = NULL,
696	.collect_bmap = NULL,
697	.write_data_binfo = nilfs_write_file_data_binfo,
698	.write_node_binfo = NULL,
699};
700
701static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
702					      struct list_head *listp,
703					      size_t nlimit,
704					      loff_t start, loff_t end)
705{
706	struct address_space *mapping = inode->i_mapping;
707	struct pagevec pvec;
708	pgoff_t index = 0, last = ULONG_MAX;
709	size_t ndirties = 0;
710	int i;
711
712	if (unlikely(start != 0 || end != LLONG_MAX)) {
713		/*
714		 * A valid range is given for sync-ing data pages. The
715		 * range is rounded to per-page; extra dirty buffers
716		 * may be included if blocksize < pagesize.
717		 */
718		index = start >> PAGE_SHIFT;
719		last = end >> PAGE_SHIFT;
720	}
721	pagevec_init(&pvec);
722 repeat:
723	if (unlikely(index > last) ||
724	    !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
725				PAGECACHE_TAG_DIRTY))
726		return ndirties;
727
728	for (i = 0; i < pagevec_count(&pvec); i++) {
729		struct buffer_head *bh, *head;
730		struct page *page = pvec.pages[i];
731
732		lock_page(page);
733		if (unlikely(page->mapping != mapping)) {
734			/* Exclude pages removed from the address space */
735			unlock_page(page);
736			continue;
737		}
738		if (!page_has_buffers(page))
739			create_empty_buffers(page, i_blocksize(inode), 0);
740		unlock_page(page);
741
742		bh = head = page_buffers(page);
743		do {
744			if (!buffer_dirty(bh) || buffer_async_write(bh))
745				continue;
746			get_bh(bh);
747			list_add_tail(&bh->b_assoc_buffers, listp);
748			ndirties++;
749			if (unlikely(ndirties >= nlimit)) {
750				pagevec_release(&pvec);
751				cond_resched();
752				return ndirties;
753			}
754		} while (bh = bh->b_this_page, bh != head);
755	}
756	pagevec_release(&pvec);
757	cond_resched();
758	goto repeat;
759}
760
761static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
762					    struct list_head *listp)
763{
764	struct nilfs_inode_info *ii = NILFS_I(inode);
765	struct inode *btnc_inode = ii->i_assoc_inode;
766	struct pagevec pvec;
767	struct buffer_head *bh, *head;
768	unsigned int i;
769	pgoff_t index = 0;
770
771	if (!btnc_inode)
772		return;
773
774	pagevec_init(&pvec);
775
776	while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
777					PAGECACHE_TAG_DIRTY)) {
778		for (i = 0; i < pagevec_count(&pvec); i++) {
779			bh = head = page_buffers(pvec.pages[i]);
780			do {
781				if (buffer_dirty(bh) &&
782						!buffer_async_write(bh)) {
783					get_bh(bh);
784					list_add_tail(&bh->b_assoc_buffers,
785						      listp);
786				}
787				bh = bh->b_this_page;
788			} while (bh != head);
789		}
790		pagevec_release(&pvec);
791		cond_resched();
792	}
793}
794
795static void nilfs_dispose_list(struct the_nilfs *nilfs,
796			       struct list_head *head, int force)
797{
798	struct nilfs_inode_info *ii, *n;
799	struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
800	unsigned int nv = 0;
801
802	while (!list_empty(head)) {
803		spin_lock(&nilfs->ns_inode_lock);
804		list_for_each_entry_safe(ii, n, head, i_dirty) {
805			list_del_init(&ii->i_dirty);
806			if (force) {
807				if (unlikely(ii->i_bh)) {
808					brelse(ii->i_bh);
809					ii->i_bh = NULL;
810				}
811			} else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
812				set_bit(NILFS_I_QUEUED, &ii->i_state);
813				list_add_tail(&ii->i_dirty,
814					      &nilfs->ns_dirty_files);
815				continue;
816			}
817			ivec[nv++] = ii;
818			if (nv == SC_N_INODEVEC)
819				break;
820		}
821		spin_unlock(&nilfs->ns_inode_lock);
822
823		for (pii = ivec; nv > 0; pii++, nv--)
824			iput(&(*pii)->vfs_inode);
825	}
826}
827
828static void nilfs_iput_work_func(struct work_struct *work)
829{
830	struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
831						 sc_iput_work);
832	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
833
834	nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
835}
836
837static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
838				     struct nilfs_root *root)
839{
840	int ret = 0;
841
842	if (nilfs_mdt_fetch_dirty(root->ifile))
843		ret++;
844	if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
845		ret++;
846	if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
847		ret++;
848	if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
849		ret++;
850	return ret;
851}
852
853static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
854{
855	return list_empty(&sci->sc_dirty_files) &&
856		!test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
857		sci->sc_nfreesegs == 0 &&
858		(!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
859}
860
861static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
862{
863	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
864	int ret = 0;
865
866	if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
867		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
868
869	spin_lock(&nilfs->ns_inode_lock);
870	if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
871		ret++;
872
873	spin_unlock(&nilfs->ns_inode_lock);
874	return ret;
875}
876
877static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
878{
879	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
880
881	nilfs_mdt_clear_dirty(sci->sc_root->ifile);
882	nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
883	nilfs_mdt_clear_dirty(nilfs->ns_sufile);
884	nilfs_mdt_clear_dirty(nilfs->ns_dat);
885}
886
887static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
888{
889	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
890	struct buffer_head *bh_cp;
891	struct nilfs_checkpoint *raw_cp;
892	int err;
893
894	/* XXX: this interface will be changed */
895	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
896					  &raw_cp, &bh_cp);
897	if (likely(!err)) {
898		/*
899		 * The following code is duplicated with cpfile.  But, it is
900		 * needed to collect the checkpoint even if it was not newly
901		 * created.
902		 */
903		mark_buffer_dirty(bh_cp);
904		nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
905		nilfs_cpfile_put_checkpoint(
906			nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
907	} else if (err == -EINVAL || err == -ENOENT) {
908		nilfs_error(sci->sc_super,
909			    "checkpoint creation failed due to metadata corruption.");
910		err = -EIO;
911	}
912	return err;
913}
914
915static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
916{
917	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
918	struct buffer_head *bh_cp;
919	struct nilfs_checkpoint *raw_cp;
920	int err;
921
922	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
923					  &raw_cp, &bh_cp);
924	if (unlikely(err)) {
925		if (err == -EINVAL || err == -ENOENT) {
926			nilfs_error(sci->sc_super,
927				    "checkpoint finalization failed due to metadata corruption.");
928			err = -EIO;
929		}
930		goto failed_ibh;
931	}
932	raw_cp->cp_snapshot_list.ssl_next = 0;
933	raw_cp->cp_snapshot_list.ssl_prev = 0;
934	raw_cp->cp_inodes_count =
935		cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
936	raw_cp->cp_blocks_count =
937		cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
938	raw_cp->cp_nblk_inc =
939		cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
940	raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
941	raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
942
943	if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
944		nilfs_checkpoint_clear_minor(raw_cp);
945	else
946		nilfs_checkpoint_set_minor(raw_cp);
947
948	nilfs_write_inode_common(sci->sc_root->ifile,
949				 &raw_cp->cp_ifile_inode, 1);
950	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
951	return 0;
952
953 failed_ibh:
954	return err;
955}
956
957static void nilfs_fill_in_file_bmap(struct inode *ifile,
958				    struct nilfs_inode_info *ii)
959
960{
961	struct buffer_head *ibh;
962	struct nilfs_inode *raw_inode;
963
964	if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
965		ibh = ii->i_bh;
966		BUG_ON(!ibh);
967		raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
968						  ibh);
969		nilfs_bmap_write(ii->i_bmap, raw_inode);
970		nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
971	}
972}
973
974static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
975{
976	struct nilfs_inode_info *ii;
977
978	list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
979		nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
980		set_bit(NILFS_I_COLLECTED, &ii->i_state);
981	}
982}
983
984static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
985					     struct the_nilfs *nilfs)
986{
987	struct buffer_head *bh_sr;
988	struct nilfs_super_root *raw_sr;
989	unsigned int isz, srsz;
990
991	bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
992
993	lock_buffer(bh_sr);
994	raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
995	isz = nilfs->ns_inode_size;
996	srsz = NILFS_SR_BYTES(isz);
997
998	raw_sr->sr_sum = 0;  /* Ensure initialization within this update */
999	raw_sr->sr_bytes = cpu_to_le16(srsz);
1000	raw_sr->sr_nongc_ctime
1001		= cpu_to_le64(nilfs_doing_gc() ?
1002			      nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
1003	raw_sr->sr_flags = 0;
1004
1005	nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
1006				 NILFS_SR_DAT_OFFSET(isz), 1);
1007	nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
1008				 NILFS_SR_CPFILE_OFFSET(isz), 1);
1009	nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
1010				 NILFS_SR_SUFILE_OFFSET(isz), 1);
1011	memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
1012	set_buffer_uptodate(bh_sr);
1013	unlock_buffer(bh_sr);
1014}
1015
1016static void nilfs_redirty_inodes(struct list_head *head)
1017{
1018	struct nilfs_inode_info *ii;
1019
1020	list_for_each_entry(ii, head, i_dirty) {
1021		if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
1022			clear_bit(NILFS_I_COLLECTED, &ii->i_state);
1023	}
1024}
1025
1026static void nilfs_drop_collected_inodes(struct list_head *head)
1027{
1028	struct nilfs_inode_info *ii;
1029
1030	list_for_each_entry(ii, head, i_dirty) {
1031		if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
1032			continue;
1033
1034		clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
1035		set_bit(NILFS_I_UPDATED, &ii->i_state);
1036	}
1037}
1038
1039static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
1040				       struct inode *inode,
1041				       struct list_head *listp,
1042				       int (*collect)(struct nilfs_sc_info *,
1043						      struct buffer_head *,
1044						      struct inode *))
1045{
1046	struct buffer_head *bh, *n;
1047	int err = 0;
1048
1049	if (collect) {
1050		list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
1051			list_del_init(&bh->b_assoc_buffers);
1052			err = collect(sci, bh, inode);
1053			brelse(bh);
1054			if (unlikely(err))
1055				goto dispose_buffers;
1056		}
1057		return 0;
1058	}
1059
1060 dispose_buffers:
1061	while (!list_empty(listp)) {
1062		bh = list_first_entry(listp, struct buffer_head,
1063				      b_assoc_buffers);
1064		list_del_init(&bh->b_assoc_buffers);
1065		brelse(bh);
1066	}
1067	return err;
1068}
1069
1070static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
1071{
1072	/* Remaining number of blocks within segment buffer */
1073	return sci->sc_segbuf_nblocks -
1074		(sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
1075}
1076
1077static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
1078				   struct inode *inode,
1079				   const struct nilfs_sc_operations *sc_ops)
1080{
1081	LIST_HEAD(data_buffers);
1082	LIST_HEAD(node_buffers);
1083	int err;
1084
1085	if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1086		size_t n, rest = nilfs_segctor_buffer_rest(sci);
1087
1088		n = nilfs_lookup_dirty_data_buffers(
1089			inode, &data_buffers, rest + 1, 0, LLONG_MAX);
1090		if (n > rest) {
1091			err = nilfs_segctor_apply_buffers(
1092				sci, inode, &data_buffers,
1093				sc_ops->collect_data);
1094			BUG_ON(!err); /* always receive -E2BIG or true error */
1095			goto break_or_fail;
1096		}
1097	}
1098	nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
1099
1100	if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1101		err = nilfs_segctor_apply_buffers(
1102			sci, inode, &data_buffers, sc_ops->collect_data);
1103		if (unlikely(err)) {
1104			/* dispose node list */
1105			nilfs_segctor_apply_buffers(
1106				sci, inode, &node_buffers, NULL);
1107			goto break_or_fail;
1108		}
1109		sci->sc_stage.flags |= NILFS_CF_NODE;
1110	}
1111	/* Collect node */
1112	err = nilfs_segctor_apply_buffers(
1113		sci, inode, &node_buffers, sc_ops->collect_node);
1114	if (unlikely(err))
1115		goto break_or_fail;
1116
1117	nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1118	err = nilfs_segctor_apply_buffers(
1119		sci, inode, &node_buffers, sc_ops->collect_bmap);
1120	if (unlikely(err))
1121		goto break_or_fail;
1122
1123	nilfs_segctor_end_finfo(sci, inode);
1124	sci->sc_stage.flags &= ~NILFS_CF_NODE;
1125
1126 break_or_fail:
1127	return err;
1128}
1129
1130static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1131					 struct inode *inode)
1132{
1133	LIST_HEAD(data_buffers);
1134	size_t n, rest = nilfs_segctor_buffer_rest(sci);
1135	int err;
1136
1137	n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
1138					    sci->sc_dsync_start,
1139					    sci->sc_dsync_end);
1140
1141	err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1142					  nilfs_collect_file_data);
1143	if (!err) {
1144		nilfs_segctor_end_finfo(sci, inode);
1145		BUG_ON(n > rest);
1146		/* always receive -E2BIG or true error if n > rest */
1147	}
1148	return err;
1149}
1150
1151static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1152{
1153	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1154	struct list_head *head;
1155	struct nilfs_inode_info *ii;
1156	size_t ndone;
1157	int err = 0;
1158
1159	switch (nilfs_sc_cstage_get(sci)) {
1160	case NILFS_ST_INIT:
1161		/* Pre-processes */
1162		sci->sc_stage.flags = 0;
1163
1164		if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1165			sci->sc_nblk_inc = 0;
1166			sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1167			if (mode == SC_LSEG_DSYNC) {
1168				nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
1169				goto dsync_mode;
1170			}
1171		}
1172
1173		sci->sc_stage.dirty_file_ptr = NULL;
1174		sci->sc_stage.gc_inode_ptr = NULL;
1175		if (mode == SC_FLUSH_DAT) {
1176			nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
1177			goto dat_stage;
1178		}
1179		nilfs_sc_cstage_inc(sci);
1180		fallthrough;
1181	case NILFS_ST_GC:
1182		if (nilfs_doing_gc()) {
1183			head = &sci->sc_gc_inodes;
1184			ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1185						head, i_dirty);
1186			list_for_each_entry_continue(ii, head, i_dirty) {
1187				err = nilfs_segctor_scan_file(
1188					sci, &ii->vfs_inode,
1189					&nilfs_sc_file_ops);
1190				if (unlikely(err)) {
1191					sci->sc_stage.gc_inode_ptr = list_entry(
1192						ii->i_dirty.prev,
1193						struct nilfs_inode_info,
1194						i_dirty);
1195					goto break_or_fail;
1196				}
1197				set_bit(NILFS_I_COLLECTED, &ii->i_state);
1198			}
1199			sci->sc_stage.gc_inode_ptr = NULL;
1200		}
1201		nilfs_sc_cstage_inc(sci);
1202		fallthrough;
1203	case NILFS_ST_FILE:
1204		head = &sci->sc_dirty_files;
1205		ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1206					i_dirty);
1207		list_for_each_entry_continue(ii, head, i_dirty) {
1208			clear_bit(NILFS_I_DIRTY, &ii->i_state);
1209
1210			err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1211						      &nilfs_sc_file_ops);
1212			if (unlikely(err)) {
1213				sci->sc_stage.dirty_file_ptr =
1214					list_entry(ii->i_dirty.prev,
1215						   struct nilfs_inode_info,
1216						   i_dirty);
1217				goto break_or_fail;
1218			}
1219			/* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1220			/* XXX: required ? */
1221		}
1222		sci->sc_stage.dirty_file_ptr = NULL;
1223		if (mode == SC_FLUSH_FILE) {
1224			nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1225			return 0;
1226		}
1227		nilfs_sc_cstage_inc(sci);
1228		sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1229		fallthrough;
1230	case NILFS_ST_IFILE:
1231		err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
1232					      &nilfs_sc_file_ops);
1233		if (unlikely(err))
1234			break;
1235		nilfs_sc_cstage_inc(sci);
1236		/* Creating a checkpoint */
1237		err = nilfs_segctor_create_checkpoint(sci);
1238		if (unlikely(err))
1239			break;
1240		fallthrough;
1241	case NILFS_ST_CPFILE:
1242		err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1243					      &nilfs_sc_file_ops);
1244		if (unlikely(err))
1245			break;
1246		nilfs_sc_cstage_inc(sci);
1247		fallthrough;
1248	case NILFS_ST_SUFILE:
1249		err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1250					 sci->sc_nfreesegs, &ndone);
1251		if (unlikely(err)) {
1252			nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1253						  sci->sc_freesegs, ndone,
1254						  NULL);
1255			break;
1256		}
1257		sci->sc_stage.flags |= NILFS_CF_SUFREED;
1258
1259		err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1260					      &nilfs_sc_file_ops);
1261		if (unlikely(err))
1262			break;
1263		nilfs_sc_cstage_inc(sci);
1264		fallthrough;
1265	case NILFS_ST_DAT:
1266 dat_stage:
1267		err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1268					      &nilfs_sc_dat_ops);
1269		if (unlikely(err))
1270			break;
1271		if (mode == SC_FLUSH_DAT) {
1272			nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1273			return 0;
1274		}
1275		nilfs_sc_cstage_inc(sci);
1276		fallthrough;
1277	case NILFS_ST_SR:
1278		if (mode == SC_LSEG_SR) {
1279			/* Appending a super root */
1280			err = nilfs_segctor_add_super_root(sci);
1281			if (unlikely(err))
1282				break;
1283		}
1284		/* End of a logical segment */
1285		sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1286		nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1287		return 0;
1288	case NILFS_ST_DSYNC:
1289 dsync_mode:
1290		sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1291		ii = sci->sc_dsync_inode;
1292		if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1293			break;
1294
1295		err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1296		if (unlikely(err))
1297			break;
1298		sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1299		nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1300		return 0;
1301	case NILFS_ST_DONE:
1302		return 0;
1303	default:
1304		BUG();
1305	}
1306
1307 break_or_fail:
1308	return err;
1309}
1310
1311/**
1312 * nilfs_segctor_begin_construction - setup segment buffer to make a new log
1313 * @sci: nilfs_sc_info
1314 * @nilfs: nilfs object
1315 */
1316static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1317					    struct the_nilfs *nilfs)
1318{
1319	struct nilfs_segment_buffer *segbuf, *prev;
1320	__u64 nextnum;
1321	int err, alloc = 0;
1322
1323	segbuf = nilfs_segbuf_new(sci->sc_super);
1324	if (unlikely(!segbuf))
1325		return -ENOMEM;
1326
1327	if (list_empty(&sci->sc_write_logs)) {
1328		nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1329				 nilfs->ns_pseg_offset, nilfs);
1330		if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1331			nilfs_shift_to_next_segment(nilfs);
1332			nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1333		}
1334
1335		segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1336		nextnum = nilfs->ns_nextnum;
1337
1338		if (nilfs->ns_segnum == nilfs->ns_nextnum)
1339			/* Start from the head of a new full segment */
1340			alloc++;
1341	} else {
1342		/* Continue logs */
1343		prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1344		nilfs_segbuf_map_cont(segbuf, prev);
1345		segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1346		nextnum = prev->sb_nextnum;
1347
1348		if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1349			nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1350			segbuf->sb_sum.seg_seq++;
1351			alloc++;
1352		}
1353	}
1354
1355	err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1356	if (err)
1357		goto failed;
1358
1359	if (alloc) {
1360		err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1361		if (err)
1362			goto failed;
1363	}
1364	nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1365
1366	BUG_ON(!list_empty(&sci->sc_segbufs));
1367	list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1368	sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1369	return 0;
1370
1371 failed:
1372	nilfs_segbuf_free(segbuf);
1373	return err;
1374}
1375
1376static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1377					 struct the_nilfs *nilfs, int nadd)
1378{
1379	struct nilfs_segment_buffer *segbuf, *prev;
1380	struct inode *sufile = nilfs->ns_sufile;
1381	__u64 nextnextnum;
1382	LIST_HEAD(list);
1383	int err, ret, i;
1384
1385	prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1386	/*
1387	 * Since the segment specified with nextnum might be allocated during
1388	 * the previous construction, the buffer including its segusage may
1389	 * not be dirty.  The following call ensures that the buffer is dirty
1390	 * and will pin the buffer on memory until the sufile is written.
1391	 */
1392	err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1393	if (unlikely(err))
1394		return err;
1395
1396	for (i = 0; i < nadd; i++) {
1397		/* extend segment info */
1398		err = -ENOMEM;
1399		segbuf = nilfs_segbuf_new(sci->sc_super);
1400		if (unlikely(!segbuf))
1401			goto failed;
1402
1403		/* map this buffer to region of segment on-disk */
1404		nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1405		sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1406
1407		/* allocate the next next full segment */
1408		err = nilfs_sufile_alloc(sufile, &nextnextnum);
1409		if (unlikely(err))
1410			goto failed_segbuf;
1411
1412		segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1413		nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1414
1415		list_add_tail(&segbuf->sb_list, &list);
1416		prev = segbuf;
1417	}
1418	list_splice_tail(&list, &sci->sc_segbufs);
1419	return 0;
1420
1421 failed_segbuf:
1422	nilfs_segbuf_free(segbuf);
1423 failed:
1424	list_for_each_entry(segbuf, &list, sb_list) {
1425		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1426		WARN_ON(ret); /* never fails */
1427	}
1428	nilfs_destroy_logs(&list);
1429	return err;
1430}
1431
1432static void nilfs_free_incomplete_logs(struct list_head *logs,
1433				       struct the_nilfs *nilfs)
1434{
1435	struct nilfs_segment_buffer *segbuf, *prev;
1436	struct inode *sufile = nilfs->ns_sufile;
1437	int ret;
1438
1439	segbuf = NILFS_FIRST_SEGBUF(logs);
1440	if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1441		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1442		WARN_ON(ret); /* never fails */
1443	}
1444	if (atomic_read(&segbuf->sb_err)) {
1445		/* Case 1: The first segment failed */
1446		if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1447			/*
1448			 * Case 1a:  Partial segment appended into an existing
1449			 * segment
1450			 */
1451			nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1452						segbuf->sb_fseg_end);
1453		else /* Case 1b:  New full segment */
1454			set_nilfs_discontinued(nilfs);
1455	}
1456
1457	prev = segbuf;
1458	list_for_each_entry_continue(segbuf, logs, sb_list) {
1459		if (prev->sb_nextnum != segbuf->sb_nextnum) {
1460			ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1461			WARN_ON(ret); /* never fails */
1462		}
1463		if (atomic_read(&segbuf->sb_err) &&
1464		    segbuf->sb_segnum != nilfs->ns_nextnum)
1465			/* Case 2: extended segment (!= next) failed */
1466			nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1467		prev = segbuf;
1468	}
1469}
1470
1471static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1472					  struct inode *sufile)
1473{
1474	struct nilfs_segment_buffer *segbuf;
1475	unsigned long live_blocks;
1476	int ret;
1477
1478	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1479		live_blocks = segbuf->sb_sum.nblocks +
1480			(segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1481		ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1482						     live_blocks,
1483						     sci->sc_seg_ctime);
1484		WARN_ON(ret); /* always succeed because the segusage is dirty */
1485	}
1486}
1487
1488static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1489{
1490	struct nilfs_segment_buffer *segbuf;
1491	int ret;
1492
1493	segbuf = NILFS_FIRST_SEGBUF(logs);
1494	ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1495					     segbuf->sb_pseg_start -
1496					     segbuf->sb_fseg_start, 0);
1497	WARN_ON(ret); /* always succeed because the segusage is dirty */
1498
1499	list_for_each_entry_continue(segbuf, logs, sb_list) {
1500		ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1501						     0, 0);
1502		WARN_ON(ret); /* always succeed */
1503	}
1504}
1505
1506static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1507					    struct nilfs_segment_buffer *last,
1508					    struct inode *sufile)
1509{
1510	struct nilfs_segment_buffer *segbuf = last;
1511	int ret;
1512
1513	list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1514		sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1515		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1516		WARN_ON(ret);
1517	}
1518	nilfs_truncate_logs(&sci->sc_segbufs, last);
1519}
1520
1521
1522static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1523				 struct the_nilfs *nilfs, int mode)
1524{
1525	struct nilfs_cstage prev_stage = sci->sc_stage;
1526	int err, nadd = 1;
1527
1528	/* Collection retry loop */
1529	for (;;) {
1530		sci->sc_nblk_this_inc = 0;
1531		sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1532
1533		err = nilfs_segctor_reset_segment_buffer(sci);
1534		if (unlikely(err))
1535			goto failed;
1536
1537		err = nilfs_segctor_collect_blocks(sci, mode);
1538		sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1539		if (!err)
1540			break;
1541
1542		if (unlikely(err != -E2BIG))
1543			goto failed;
1544
1545		/* The current segment is filled up */
1546		if (mode != SC_LSEG_SR ||
1547		    nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
1548			break;
1549
1550		nilfs_clear_logs(&sci->sc_segbufs);
1551
1552		if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1553			err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1554							sci->sc_freesegs,
1555							sci->sc_nfreesegs,
1556							NULL);
1557			WARN_ON(err); /* do not happen */
1558			sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
1559		}
1560
1561		err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1562		if (unlikely(err))
1563			return err;
1564
1565		nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1566		sci->sc_stage = prev_stage;
1567	}
1568	nilfs_segctor_zeropad_segsum(sci);
1569	nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1570	return 0;
1571
1572 failed:
1573	return err;
1574}
1575
1576static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1577				      struct buffer_head *new_bh)
1578{
1579	BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1580
1581	list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1582	/* The caller must release old_bh */
1583}
1584
1585static int
1586nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1587				     struct nilfs_segment_buffer *segbuf,
1588				     int mode)
1589{
1590	struct inode *inode = NULL;
1591	sector_t blocknr;
1592	unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1593	unsigned long nblocks = 0, ndatablk = 0;
1594	const struct nilfs_sc_operations *sc_op = NULL;
1595	struct nilfs_segsum_pointer ssp;
1596	struct nilfs_finfo *finfo = NULL;
1597	union nilfs_binfo binfo;
1598	struct buffer_head *bh, *bh_org;
1599	ino_t ino = 0;
1600	int err = 0;
1601
1602	if (!nfinfo)
1603		goto out;
1604
1605	blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1606	ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1607	ssp.offset = sizeof(struct nilfs_segment_summary);
1608
1609	list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1610		if (bh == segbuf->sb_super_root)
1611			break;
1612		if (!finfo) {
1613			finfo =	nilfs_segctor_map_segsum_entry(
1614				sci, &ssp, sizeof(*finfo));
1615			ino = le64_to_cpu(finfo->fi_ino);
1616			nblocks = le32_to_cpu(finfo->fi_nblocks);
1617			ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1618
1619			inode = bh->b_page->mapping->host;
1620
1621			if (mode == SC_LSEG_DSYNC)
1622				sc_op = &nilfs_sc_dsync_ops;
1623			else if (ino == NILFS_DAT_INO)
1624				sc_op = &nilfs_sc_dat_ops;
1625			else /* file blocks */
1626				sc_op = &nilfs_sc_file_ops;
1627		}
1628		bh_org = bh;
1629		get_bh(bh_org);
1630		err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1631					&binfo);
1632		if (bh != bh_org)
1633			nilfs_list_replace_buffer(bh_org, bh);
1634		brelse(bh_org);
1635		if (unlikely(err))
1636			goto failed_bmap;
1637
1638		if (ndatablk > 0)
1639			sc_op->write_data_binfo(sci, &ssp, &binfo);
1640		else
1641			sc_op->write_node_binfo(sci, &ssp, &binfo);
1642
1643		blocknr++;
1644		if (--nblocks == 0) {
1645			finfo = NULL;
1646			if (--nfinfo == 0)
1647				break;
1648		} else if (ndatablk > 0)
1649			ndatablk--;
1650	}
1651 out:
1652	return 0;
1653
1654 failed_bmap:
1655	return err;
1656}
1657
1658static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1659{
1660	struct nilfs_segment_buffer *segbuf;
1661	int err;
1662
1663	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1664		err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1665		if (unlikely(err))
1666			return err;
1667		nilfs_segbuf_fill_in_segsum(segbuf);
1668	}
1669	return 0;
1670}
1671
1672static void nilfs_begin_page_io(struct page *page)
1673{
1674	if (!page || PageWriteback(page))
1675		/*
1676		 * For split b-tree node pages, this function may be called
1677		 * twice.  We ignore the 2nd or later calls by this check.
1678		 */
1679		return;
1680
1681	lock_page(page);
1682	clear_page_dirty_for_io(page);
1683	set_page_writeback(page);
1684	unlock_page(page);
1685}
1686
1687static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1688{
1689	struct nilfs_segment_buffer *segbuf;
1690	struct page *bd_page = NULL, *fs_page = NULL;
1691
1692	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1693		struct buffer_head *bh;
1694
1695		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1696				    b_assoc_buffers) {
1697			if (bh->b_page != bd_page) {
1698				if (bd_page) {
1699					lock_page(bd_page);
1700					clear_page_dirty_for_io(bd_page);
1701					set_page_writeback(bd_page);
1702					unlock_page(bd_page);
1703				}
1704				bd_page = bh->b_page;
1705			}
1706		}
1707
1708		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1709				    b_assoc_buffers) {
1710			if (bh == segbuf->sb_super_root) {
1711				if (bh->b_page != bd_page) {
1712					lock_page(bd_page);
1713					clear_page_dirty_for_io(bd_page);
1714					set_page_writeback(bd_page);
1715					unlock_page(bd_page);
1716					bd_page = bh->b_page;
1717				}
1718				break;
1719			}
1720			set_buffer_async_write(bh);
1721			if (bh->b_page != fs_page) {
1722				nilfs_begin_page_io(fs_page);
1723				fs_page = bh->b_page;
1724			}
1725		}
1726	}
1727	if (bd_page) {
1728		lock_page(bd_page);
1729		clear_page_dirty_for_io(bd_page);
1730		set_page_writeback(bd_page);
1731		unlock_page(bd_page);
1732	}
1733	nilfs_begin_page_io(fs_page);
1734}
1735
1736static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1737			       struct the_nilfs *nilfs)
1738{
1739	int ret;
1740
1741	ret = nilfs_write_logs(&sci->sc_segbufs, nilfs);
1742	list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1743	return ret;
1744}
1745
1746static void nilfs_end_page_io(struct page *page, int err)
1747{
1748	if (!page)
1749		return;
1750
1751	if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1752		/*
1753		 * For b-tree node pages, this function may be called twice
1754		 * or more because they might be split in a segment.
1755		 */
1756		if (PageDirty(page)) {
1757			/*
1758			 * For pages holding split b-tree node buffers, dirty
1759			 * flag on the buffers may be cleared discretely.
1760			 * In that case, the page is once redirtied for
1761			 * remaining buffers, and it must be cancelled if
1762			 * all the buffers get cleaned later.
1763			 */
1764			lock_page(page);
1765			if (nilfs_page_buffers_clean(page))
1766				__nilfs_clear_page_dirty(page);
1767			unlock_page(page);
1768		}
1769		return;
1770	}
1771
1772	if (!err) {
1773		if (!nilfs_page_buffers_clean(page))
1774			__set_page_dirty_nobuffers(page);
1775		ClearPageError(page);
1776	} else {
1777		__set_page_dirty_nobuffers(page);
1778		SetPageError(page);
1779	}
1780
1781	end_page_writeback(page);
1782}
1783
1784static void nilfs_abort_logs(struct list_head *logs, int err)
1785{
1786	struct nilfs_segment_buffer *segbuf;
1787	struct page *bd_page = NULL, *fs_page = NULL;
1788	struct buffer_head *bh;
1789
1790	if (list_empty(logs))
1791		return;
1792
1793	list_for_each_entry(segbuf, logs, sb_list) {
1794		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1795				    b_assoc_buffers) {
1796			clear_buffer_uptodate(bh);
1797			if (bh->b_page != bd_page) {
1798				if (bd_page)
1799					end_page_writeback(bd_page);
1800				bd_page = bh->b_page;
1801			}
1802		}
1803
1804		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1805				    b_assoc_buffers) {
1806			if (bh == segbuf->sb_super_root) {
1807				clear_buffer_uptodate(bh);
1808				if (bh->b_page != bd_page) {
1809					end_page_writeback(bd_page);
1810					bd_page = bh->b_page;
1811				}
1812				break;
1813			}
1814			clear_buffer_async_write(bh);
1815			if (bh->b_page != fs_page) {
1816				nilfs_end_page_io(fs_page, err);
1817				fs_page = bh->b_page;
1818			}
1819		}
1820	}
1821	if (bd_page)
1822		end_page_writeback(bd_page);
1823
1824	nilfs_end_page_io(fs_page, err);
1825}
1826
1827static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1828					     struct the_nilfs *nilfs, int err)
1829{
1830	LIST_HEAD(logs);
1831	int ret;
1832
1833	list_splice_tail_init(&sci->sc_write_logs, &logs);
1834	ret = nilfs_wait_on_logs(&logs);
1835	nilfs_abort_logs(&logs, ret ? : err);
1836
1837	list_splice_tail_init(&sci->sc_segbufs, &logs);
1838	nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1839	nilfs_free_incomplete_logs(&logs, nilfs);
1840
1841	if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1842		ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1843						sci->sc_freesegs,
1844						sci->sc_nfreesegs,
1845						NULL);
1846		WARN_ON(ret); /* do not happen */
1847	}
1848
1849	nilfs_destroy_logs(&logs);
1850}
1851
1852static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1853				   struct nilfs_segment_buffer *segbuf)
1854{
1855	nilfs->ns_segnum = segbuf->sb_segnum;
1856	nilfs->ns_nextnum = segbuf->sb_nextnum;
1857	nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
1858		+ segbuf->sb_sum.nblocks;
1859	nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
1860	nilfs->ns_ctime = segbuf->sb_sum.ctime;
1861}
1862
1863static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1864{
1865	struct nilfs_segment_buffer *segbuf;
1866	struct page *bd_page = NULL, *fs_page = NULL;
1867	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1868	int update_sr = false;
1869
1870	list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1871		struct buffer_head *bh;
1872
1873		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1874				    b_assoc_buffers) {
1875			set_buffer_uptodate(bh);
1876			clear_buffer_dirty(bh);
1877			if (bh->b_page != bd_page) {
1878				if (bd_page)
1879					end_page_writeback(bd_page);
1880				bd_page = bh->b_page;
1881			}
1882		}
1883		/*
1884		 * We assume that the buffers which belong to the same page
1885		 * continue over the buffer list.
1886		 * Under this assumption, the last BHs of pages is
1887		 * identifiable by the discontinuity of bh->b_page
1888		 * (page != fs_page).
1889		 *
1890		 * For B-tree node blocks, however, this assumption is not
1891		 * guaranteed.  The cleanup code of B-tree node pages needs
1892		 * special care.
1893		 */
1894		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1895				    b_assoc_buffers) {
1896			const unsigned long set_bits = BIT(BH_Uptodate);
1897			const unsigned long clear_bits =
1898				(BIT(BH_Dirty) | BIT(BH_Async_Write) |
1899				 BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
1900				 BIT(BH_NILFS_Redirected));
1901
1902			if (bh == segbuf->sb_super_root) {
1903				set_buffer_uptodate(bh);
1904				clear_buffer_dirty(bh);
1905				if (bh->b_page != bd_page) {
1906					end_page_writeback(bd_page);
1907					bd_page = bh->b_page;
1908				}
1909				update_sr = true;
1910				break;
1911			}
1912			set_mask_bits(&bh->b_state, clear_bits, set_bits);
1913			if (bh->b_page != fs_page) {
1914				nilfs_end_page_io(fs_page, 0);
1915				fs_page = bh->b_page;
1916			}
1917		}
1918
1919		if (!nilfs_segbuf_simplex(segbuf)) {
1920			if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
1921				set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1922				sci->sc_lseg_stime = jiffies;
1923			}
1924			if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
1925				clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1926		}
1927	}
1928	/*
1929	 * Since pages may continue over multiple segment buffers,
1930	 * end of the last page must be checked outside of the loop.
1931	 */
1932	if (bd_page)
1933		end_page_writeback(bd_page);
1934
1935	nilfs_end_page_io(fs_page, 0);
1936
1937	nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1938
1939	if (nilfs_doing_gc())
1940		nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
1941	else
1942		nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
1943
1944	sci->sc_nblk_inc += sci->sc_nblk_this_inc;
1945
1946	segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1947	nilfs_set_next_segment(nilfs, segbuf);
1948
1949	if (update_sr) {
1950		nilfs->ns_flushed_device = 0;
1951		nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
1952				       segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
1953
1954		clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
1955		clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
1956		set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1957		nilfs_segctor_clear_metadata_dirty(sci);
1958	} else
1959		clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1960}
1961
1962static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1963{
1964	int ret;
1965
1966	ret = nilfs_wait_on_logs(&sci->sc_write_logs);
1967	if (!ret) {
1968		nilfs_segctor_complete_write(sci);
1969		nilfs_destroy_logs(&sci->sc_write_logs);
1970	}
1971	return ret;
1972}
1973
1974static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1975					     struct the_nilfs *nilfs)
1976{
1977	struct nilfs_inode_info *ii, *n;
1978	struct inode *ifile = sci->sc_root->ifile;
1979
1980	spin_lock(&nilfs->ns_inode_lock);
1981 retry:
1982	list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1983		if (!ii->i_bh) {
1984			struct buffer_head *ibh;
1985			int err;
1986
1987			spin_unlock(&nilfs->ns_inode_lock);
1988			err = nilfs_ifile_get_inode_block(
1989				ifile, ii->vfs_inode.i_ino, &ibh);
1990			if (unlikely(err)) {
1991				nilfs_warn(sci->sc_super,
1992					   "log writer: error %d getting inode block (ino=%lu)",
1993					   err, ii->vfs_inode.i_ino);
1994				return err;
1995			}
1996			spin_lock(&nilfs->ns_inode_lock);
1997			if (likely(!ii->i_bh))
1998				ii->i_bh = ibh;
1999			else
2000				brelse(ibh);
2001			goto retry;
2002		}
2003
2004		// Always redirty the buffer to avoid race condition
2005		mark_buffer_dirty(ii->i_bh);
2006		nilfs_mdt_mark_dirty(ifile);
2007
2008		clear_bit(NILFS_I_QUEUED, &ii->i_state);
2009		set_bit(NILFS_I_BUSY, &ii->i_state);
2010		list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
2011	}
2012	spin_unlock(&nilfs->ns_inode_lock);
2013
2014	return 0;
2015}
2016
2017static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2018					     struct the_nilfs *nilfs)
2019{
2020	struct nilfs_inode_info *ii, *n;
2021	int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);
2022	int defer_iput = false;
2023
2024	spin_lock(&nilfs->ns_inode_lock);
2025	list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2026		if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2027		    test_bit(NILFS_I_DIRTY, &ii->i_state))
2028			continue;
2029
2030		clear_bit(NILFS_I_BUSY, &ii->i_state);
2031		brelse(ii->i_bh);
2032		ii->i_bh = NULL;
2033		list_del_init(&ii->i_dirty);
2034		if (!ii->vfs_inode.i_nlink || during_mount) {
2035			/*
2036			 * Defer calling iput() to avoid deadlocks if
2037			 * i_nlink == 0 or mount is not yet finished.
2038			 */
2039			list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
2040			defer_iput = true;
2041		} else {
2042			spin_unlock(&nilfs->ns_inode_lock);
2043			iput(&ii->vfs_inode);
2044			spin_lock(&nilfs->ns_inode_lock);
2045		}
2046	}
2047	spin_unlock(&nilfs->ns_inode_lock);
2048
2049	if (defer_iput)
2050		schedule_work(&sci->sc_iput_work);
2051}
2052
2053/*
2054 * Main procedure of segment constructor
2055 */
2056static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2057{
2058	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2059	int err;
2060
2061	if (sb_rdonly(sci->sc_super))
2062		return -EROFS;
2063
2064	nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
2065	sci->sc_cno = nilfs->ns_cno;
2066
2067	err = nilfs_segctor_collect_dirty_files(sci, nilfs);
2068	if (unlikely(err))
2069		goto out;
2070
2071	if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
2072		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2073
2074	if (nilfs_segctor_clean(sci))
2075		goto out;
2076
2077	do {
2078		sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
2079
2080		err = nilfs_segctor_begin_construction(sci, nilfs);
2081		if (unlikely(err))
2082			goto out;
2083
2084		/* Update time stamp */
2085		sci->sc_seg_ctime = ktime_get_real_seconds();
2086
2087		err = nilfs_segctor_collect(sci, nilfs, mode);
2088		if (unlikely(err))
2089			goto failed;
2090
2091		/* Avoid empty segment */
2092		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
2093		    nilfs_segbuf_empty(sci->sc_curseg)) {
2094			nilfs_segctor_abort_construction(sci, nilfs, 1);
2095			goto out;
2096		}
2097
2098		err = nilfs_segctor_assign(sci, mode);
2099		if (unlikely(err))
2100			goto failed;
2101
2102		if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2103			nilfs_segctor_fill_in_file_bmap(sci);
2104
2105		if (mode == SC_LSEG_SR &&
2106		    nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
2107			err = nilfs_segctor_fill_in_checkpoint(sci);
2108			if (unlikely(err))
2109				goto failed_to_write;
2110
2111			nilfs_segctor_fill_in_super_root(sci, nilfs);
2112		}
2113		nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2114
2115		/* Write partial segments */
2116		nilfs_segctor_prepare_write(sci);
2117
2118		nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2119					    nilfs->ns_crc_seed);
2120
2121		err = nilfs_segctor_write(sci, nilfs);
2122		if (unlikely(err))
2123			goto failed_to_write;
2124
2125		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
2126		    nilfs->ns_blocksize_bits != PAGE_SHIFT) {
2127			/*
2128			 * At this point, we avoid double buffering
2129			 * for blocksize < pagesize because page dirty
2130			 * flag is turned off during write and dirty
2131			 * buffers are not properly collected for
2132			 * pages crossing over segments.
2133			 */
2134			err = nilfs_segctor_wait(sci);
2135			if (err)
2136				goto failed_to_write;
2137		}
2138	} while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
2139
2140 out:
2141	nilfs_segctor_drop_written_files(sci, nilfs);
2142	return err;
2143
2144 failed_to_write:
2145	if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2146		nilfs_redirty_inodes(&sci->sc_dirty_files);
2147
2148 failed:
2149	if (nilfs_doing_gc())
2150		nilfs_redirty_inodes(&sci->sc_gc_inodes);
2151	nilfs_segctor_abort_construction(sci, nilfs, err);
2152	goto out;
2153}
2154
2155/**
2156 * nilfs_segctor_start_timer - set timer of background write
2157 * @sci: nilfs_sc_info
2158 *
2159 * If the timer has already been set, it ignores the new request.
2160 * This function MUST be called within a section locking the segment
2161 * semaphore.
2162 */
2163static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2164{
2165	spin_lock(&sci->sc_state_lock);
2166	if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2167		sci->sc_timer.expires = jiffies + sci->sc_interval;
2168		add_timer(&sci->sc_timer);
2169		sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2170	}
2171	spin_unlock(&sci->sc_state_lock);
2172}
2173
2174static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2175{
2176	spin_lock(&sci->sc_state_lock);
2177	if (!(sci->sc_flush_request & BIT(bn))) {
2178		unsigned long prev_req = sci->sc_flush_request;
2179
2180		sci->sc_flush_request |= BIT(bn);
2181		if (!prev_req)
2182			wake_up(&sci->sc_wait_daemon);
2183	}
2184	spin_unlock(&sci->sc_state_lock);
2185}
2186
2187/**
2188 * nilfs_flush_segment - trigger a segment construction for resource control
2189 * @sb: super block
2190 * @ino: inode number of the file to be flushed out.
2191 */
2192void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2193{
2194	struct the_nilfs *nilfs = sb->s_fs_info;
2195	struct nilfs_sc_info *sci = nilfs->ns_writer;
2196
2197	if (!sci || nilfs_doing_construction())
2198		return;
2199	nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2200					/* assign bit 0 to data files */
2201}
2202
2203struct nilfs_segctor_wait_request {
2204	wait_queue_entry_t	wq;
2205	__u32		seq;
2206	int		err;
2207	atomic_t	done;
2208};
2209
2210static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2211{
2212	struct nilfs_segctor_wait_request wait_req;
2213	int err = 0;
2214
2215	spin_lock(&sci->sc_state_lock);
2216	init_wait(&wait_req.wq);
2217	wait_req.err = 0;
2218	atomic_set(&wait_req.done, 0);
2219	wait_req.seq = ++sci->sc_seq_request;
2220	spin_unlock(&sci->sc_state_lock);
2221
2222	init_waitqueue_entry(&wait_req.wq, current);
2223	add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2224	set_current_state(TASK_INTERRUPTIBLE);
2225	wake_up(&sci->sc_wait_daemon);
2226
2227	for (;;) {
2228		if (atomic_read(&wait_req.done)) {
2229			err = wait_req.err;
2230			break;
2231		}
2232		if (!signal_pending(current)) {
2233			schedule();
2234			continue;
2235		}
2236		err = -ERESTARTSYS;
2237		break;
2238	}
2239	finish_wait(&sci->sc_wait_request, &wait_req.wq);
2240	return err;
2241}
2242
2243static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2244{
2245	struct nilfs_segctor_wait_request *wrq, *n;
2246	unsigned long flags;
2247
2248	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2249	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
2250		if (!atomic_read(&wrq->done) &&
2251		    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2252			wrq->err = err;
2253			atomic_set(&wrq->done, 1);
2254		}
2255		if (atomic_read(&wrq->done)) {
2256			wrq->wq.func(&wrq->wq,
2257				     TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2258				     0, NULL);
2259		}
2260	}
2261	spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2262}
2263
2264/**
2265 * nilfs_construct_segment - construct a logical segment
2266 * @sb: super block
2267 *
2268 * Return Value: On success, 0 is retured. On errors, one of the following
2269 * negative error code is returned.
2270 *
2271 * %-EROFS - Read only filesystem.
2272 *
2273 * %-EIO - I/O error
2274 *
2275 * %-ENOSPC - No space left on device (only in a panic state).
2276 *
2277 * %-ERESTARTSYS - Interrupted.
2278 *
2279 * %-ENOMEM - Insufficient memory available.
2280 */
2281int nilfs_construct_segment(struct super_block *sb)
2282{
2283	struct the_nilfs *nilfs = sb->s_fs_info;
2284	struct nilfs_sc_info *sci = nilfs->ns_writer;
2285	struct nilfs_transaction_info *ti;
2286	int err;
2287
2288	if (sb_rdonly(sb) || unlikely(!sci))
2289		return -EROFS;
2290
2291	/* A call inside transactions causes a deadlock. */
2292	BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2293
2294	err = nilfs_segctor_sync(sci);
2295	return err;
2296}
2297
2298/**
2299 * nilfs_construct_dsync_segment - construct a data-only logical segment
2300 * @sb: super block
2301 * @inode: inode whose data blocks should be written out
2302 * @start: start byte offset
2303 * @end: end byte offset (inclusive)
2304 *
2305 * Return Value: On success, 0 is retured. On errors, one of the following
2306 * negative error code is returned.
2307 *
2308 * %-EROFS - Read only filesystem.
2309 *
2310 * %-EIO - I/O error
2311 *
2312 * %-ENOSPC - No space left on device (only in a panic state).
2313 *
2314 * %-ERESTARTSYS - Interrupted.
2315 *
2316 * %-ENOMEM - Insufficient memory available.
2317 */
2318int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2319				  loff_t start, loff_t end)
2320{
2321	struct the_nilfs *nilfs = sb->s_fs_info;
2322	struct nilfs_sc_info *sci = nilfs->ns_writer;
2323	struct nilfs_inode_info *ii;
2324	struct nilfs_transaction_info ti;
2325	int err = 0;
2326
2327	if (sb_rdonly(sb) || unlikely(!sci))
2328		return -EROFS;
2329
2330	nilfs_transaction_lock(sb, &ti, 0);
2331
2332	ii = NILFS_I(inode);
2333	if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) ||
2334	    nilfs_test_opt(nilfs, STRICT_ORDER) ||
2335	    test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2336	    nilfs_discontinued(nilfs)) {
2337		nilfs_transaction_unlock(sb);
2338		err = nilfs_segctor_sync(sci);
2339		return err;
2340	}
2341
2342	spin_lock(&nilfs->ns_inode_lock);
2343	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2344	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2345		spin_unlock(&nilfs->ns_inode_lock);
2346		nilfs_transaction_unlock(sb);
2347		return 0;
2348	}
2349	spin_unlock(&nilfs->ns_inode_lock);
2350	sci->sc_dsync_inode = ii;
2351	sci->sc_dsync_start = start;
2352	sci->sc_dsync_end = end;
2353
2354	err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2355	if (!err)
2356		nilfs->ns_flushed_device = 0;
2357
2358	nilfs_transaction_unlock(sb);
2359	return err;
2360}
2361
2362#define FLUSH_FILE_BIT	(0x1) /* data file only */
2363#define FLUSH_DAT_BIT	BIT(NILFS_DAT_INO) /* DAT only */
2364
2365/**
2366 * nilfs_segctor_accept - record accepted sequence count of log-write requests
2367 * @sci: segment constructor object
2368 */
2369static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2370{
2371	spin_lock(&sci->sc_state_lock);
2372	sci->sc_seq_accepted = sci->sc_seq_request;
2373	spin_unlock(&sci->sc_state_lock);
2374	del_timer_sync(&sci->sc_timer);
2375}
2376
2377/**
2378 * nilfs_segctor_notify - notify the result of request to caller threads
2379 * @sci: segment constructor object
2380 * @mode: mode of log forming
2381 * @err: error code to be notified
2382 */
2383static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2384{
2385	/* Clear requests (even when the construction failed) */
2386	spin_lock(&sci->sc_state_lock);
2387
2388	if (mode == SC_LSEG_SR) {
2389		sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2390		sci->sc_seq_done = sci->sc_seq_accepted;
2391		nilfs_segctor_wakeup(sci, err);
2392		sci->sc_flush_request = 0;
2393	} else {
2394		if (mode == SC_FLUSH_FILE)
2395			sci->sc_flush_request &= ~FLUSH_FILE_BIT;
2396		else if (mode == SC_FLUSH_DAT)
2397			sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2398
2399		/* re-enable timer if checkpoint creation was not done */
2400		if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2401		    time_before(jiffies, sci->sc_timer.expires))
2402			add_timer(&sci->sc_timer);
2403	}
2404	spin_unlock(&sci->sc_state_lock);
2405}
2406
2407/**
2408 * nilfs_segctor_construct - form logs and write them to disk
2409 * @sci: segment constructor object
2410 * @mode: mode of log forming
2411 */
2412static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2413{
2414	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2415	struct nilfs_super_block **sbp;
2416	int err = 0;
2417
2418	nilfs_segctor_accept(sci);
2419
2420	if (nilfs_discontinued(nilfs))
2421		mode = SC_LSEG_SR;
2422	if (!nilfs_segctor_confirm(sci))
2423		err = nilfs_segctor_do_construct(sci, mode);
2424
2425	if (likely(!err)) {
2426		if (mode != SC_FLUSH_DAT)
2427			atomic_set(&nilfs->ns_ndirtyblks, 0);
2428		if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2429		    nilfs_discontinued(nilfs)) {
2430			down_write(&nilfs->ns_sem);
2431			err = -EIO;
2432			sbp = nilfs_prepare_super(sci->sc_super,
2433						  nilfs_sb_will_flip(nilfs));
2434			if (likely(sbp)) {
2435				nilfs_set_log_cursor(sbp[0], nilfs);
2436				err = nilfs_commit_super(sci->sc_super,
2437							 NILFS_SB_COMMIT);
2438			}
2439			up_write(&nilfs->ns_sem);
2440		}
2441	}
2442
2443	nilfs_segctor_notify(sci, mode, err);
2444	return err;
2445}
2446
2447static void nilfs_construction_timeout(struct timer_list *t)
2448{
2449	struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer);
2450
2451	wake_up_process(sci->sc_timer_task);
2452}
2453
2454static void
2455nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2456{
2457	struct nilfs_inode_info *ii, *n;
2458
2459	list_for_each_entry_safe(ii, n, head, i_dirty) {
2460		if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2461			continue;
2462		list_del_init(&ii->i_dirty);
2463		truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2464		nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
2465		iput(&ii->vfs_inode);
2466	}
2467}
2468
2469int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2470			 void **kbufs)
2471{
2472	struct the_nilfs *nilfs = sb->s_fs_info;
2473	struct nilfs_sc_info *sci = nilfs->ns_writer;
2474	struct nilfs_transaction_info ti;
2475	int err;
2476
2477	if (unlikely(!sci))
2478		return -EROFS;
2479
2480	nilfs_transaction_lock(sb, &ti, 1);
2481
2482	err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2483	if (unlikely(err))
2484		goto out_unlock;
2485
2486	err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
2487	if (unlikely(err)) {
2488		nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat);
2489		goto out_unlock;
2490	}
2491
2492	sci->sc_freesegs = kbufs[4];
2493	sci->sc_nfreesegs = argv[4].v_nmembs;
2494	list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2495
2496	for (;;) {
2497		err = nilfs_segctor_construct(sci, SC_LSEG_SR);
2498		nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2499
2500		if (likely(!err))
2501			break;
2502
2503		nilfs_warn(sb, "error %d cleaning segments", err);
2504		set_current_state(TASK_INTERRUPTIBLE);
2505		schedule_timeout(sci->sc_interval);
2506	}
2507	if (nilfs_test_opt(nilfs, DISCARD)) {
2508		int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2509						 sci->sc_nfreesegs);
2510		if (ret) {
2511			nilfs_warn(sb,
2512				   "error %d on discard request, turning discards off for the device",
2513				   ret);
2514			nilfs_clear_opt(nilfs, DISCARD);
2515		}
2516	}
2517
2518 out_unlock:
2519	sci->sc_freesegs = NULL;
2520	sci->sc_nfreesegs = 0;
2521	nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2522	nilfs_transaction_unlock(sb);
2523	return err;
2524}
2525
2526static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2527{
2528	struct nilfs_transaction_info ti;
2529
2530	nilfs_transaction_lock(sci->sc_super, &ti, 0);
2531	nilfs_segctor_construct(sci, mode);
2532
2533	/*
2534	 * Unclosed segment should be retried.  We do this using sc_timer.
2535	 * Timeout of sc_timer will invoke complete construction which leads
2536	 * to close the current logical segment.
2537	 */
2538	if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2539		nilfs_segctor_start_timer(sci);
2540
2541	nilfs_transaction_unlock(sci->sc_super);
2542}
2543
2544static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2545{
2546	int mode = 0;
2547
2548	spin_lock(&sci->sc_state_lock);
2549	mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2550		SC_FLUSH_DAT : SC_FLUSH_FILE;
2551	spin_unlock(&sci->sc_state_lock);
2552
2553	if (mode) {
2554		nilfs_segctor_do_construct(sci, mode);
2555
2556		spin_lock(&sci->sc_state_lock);
2557		sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2558			~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
2559		spin_unlock(&sci->sc_state_lock);
2560	}
2561	clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
2562}
2563
2564static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2565{
2566	if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2567	    time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2568		if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2569			return SC_FLUSH_FILE;
2570		else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2571			return SC_FLUSH_DAT;
2572	}
2573	return SC_LSEG_SR;
2574}
2575
2576/**
2577 * nilfs_segctor_thread - main loop of the segment constructor thread.
2578 * @arg: pointer to a struct nilfs_sc_info.
2579 *
2580 * nilfs_segctor_thread() initializes a timer and serves as a daemon
2581 * to execute segment constructions.
2582 */
2583static int nilfs_segctor_thread(void *arg)
2584{
2585	struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2586	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2587	int timeout = 0;
2588
2589	sci->sc_timer_task = current;
2590
2591	/* start sync. */
2592	sci->sc_task = current;
2593	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2594	nilfs_info(sci->sc_super,
2595		   "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds",
2596		   sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2597
2598	spin_lock(&sci->sc_state_lock);
2599 loop:
2600	for (;;) {
2601		int mode;
2602
2603		if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2604			goto end_thread;
2605
2606		if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2607			mode = SC_LSEG_SR;
2608		else if (sci->sc_flush_request)
2609			mode = nilfs_segctor_flush_mode(sci);
2610		else
2611			break;
2612
2613		spin_unlock(&sci->sc_state_lock);
2614		nilfs_segctor_thread_construct(sci, mode);
2615		spin_lock(&sci->sc_state_lock);
2616		timeout = 0;
2617	}
2618
2619
2620	if (freezing(current)) {
2621		spin_unlock(&sci->sc_state_lock);
2622		try_to_freeze();
2623		spin_lock(&sci->sc_state_lock);
2624	} else {
2625		DEFINE_WAIT(wait);
2626		int should_sleep = 1;
2627
2628		prepare_to_wait(&sci->sc_wait_daemon, &wait,
2629				TASK_INTERRUPTIBLE);
2630
2631		if (sci->sc_seq_request != sci->sc_seq_done)
2632			should_sleep = 0;
2633		else if (sci->sc_flush_request)
2634			should_sleep = 0;
2635		else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2636			should_sleep = time_before(jiffies,
2637					sci->sc_timer.expires);
2638
2639		if (should_sleep) {
2640			spin_unlock(&sci->sc_state_lock);
2641			schedule();
2642			spin_lock(&sci->sc_state_lock);
2643		}
2644		finish_wait(&sci->sc_wait_daemon, &wait);
2645		timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2646			   time_after_eq(jiffies, sci->sc_timer.expires));
2647
2648		if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2649			set_nilfs_discontinued(nilfs);
2650	}
2651	goto loop;
2652
2653 end_thread:
2654	/* end sync. */
2655	sci->sc_task = NULL;
2656	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2657	spin_unlock(&sci->sc_state_lock);
2658	return 0;
2659}
2660
2661static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2662{
2663	struct task_struct *t;
2664
2665	t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2666	if (IS_ERR(t)) {
2667		int err = PTR_ERR(t);
2668
2669		nilfs_err(sci->sc_super, "error %d creating segctord thread",
2670			  err);
2671		return err;
2672	}
2673	wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2674	return 0;
2675}
2676
2677static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2678	__acquires(&sci->sc_state_lock)
2679	__releases(&sci->sc_state_lock)
2680{
2681	sci->sc_state |= NILFS_SEGCTOR_QUIT;
2682
2683	while (sci->sc_task) {
2684		wake_up(&sci->sc_wait_daemon);
2685		spin_unlock(&sci->sc_state_lock);
2686		wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2687		spin_lock(&sci->sc_state_lock);
2688	}
2689}
2690
2691/*
2692 * Setup & clean-up functions
2693 */
2694static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2695					       struct nilfs_root *root)
2696{
2697	struct the_nilfs *nilfs = sb->s_fs_info;
2698	struct nilfs_sc_info *sci;
2699
2700	sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2701	if (!sci)
2702		return NULL;
2703
2704	sci->sc_super = sb;
2705
2706	nilfs_get_root(root);
2707	sci->sc_root = root;
2708
2709	init_waitqueue_head(&sci->sc_wait_request);
2710	init_waitqueue_head(&sci->sc_wait_daemon);
2711	init_waitqueue_head(&sci->sc_wait_task);
2712	spin_lock_init(&sci->sc_state_lock);
2713	INIT_LIST_HEAD(&sci->sc_dirty_files);
2714	INIT_LIST_HEAD(&sci->sc_segbufs);
2715	INIT_LIST_HEAD(&sci->sc_write_logs);
2716	INIT_LIST_HEAD(&sci->sc_gc_inodes);
2717	INIT_LIST_HEAD(&sci->sc_iput_queue);
2718	INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
2719	timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
2720
2721	sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2722	sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2723	sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2724
2725	if (nilfs->ns_interval)
2726		sci->sc_interval = HZ * nilfs->ns_interval;
2727	if (nilfs->ns_watermark)
2728		sci->sc_watermark = nilfs->ns_watermark;
2729	return sci;
2730}
2731
2732static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2733{
2734	int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
2735
2736	/*
2737	 * The segctord thread was stopped and its timer was removed.
2738	 * But some tasks remain.
2739	 */
2740	do {
2741		struct nilfs_transaction_info ti;
2742
2743		nilfs_transaction_lock(sci->sc_super, &ti, 0);
2744		ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2745		nilfs_transaction_unlock(sci->sc_super);
2746
2747		flush_work(&sci->sc_iput_work);
2748
2749	} while (ret && ret != -EROFS && retrycount-- > 0);
2750}
2751
2752/**
2753 * nilfs_segctor_destroy - destroy the segment constructor.
2754 * @sci: nilfs_sc_info
2755 *
2756 * nilfs_segctor_destroy() kills the segctord thread and frees
2757 * the nilfs_sc_info struct.
2758 * Caller must hold the segment semaphore.
2759 */
2760static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2761{
2762	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2763	int flag;
2764
2765	up_write(&nilfs->ns_segctor_sem);
2766
2767	spin_lock(&sci->sc_state_lock);
2768	nilfs_segctor_kill_thread(sci);
2769	flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
2770		|| sci->sc_seq_request != sci->sc_seq_done);
2771	spin_unlock(&sci->sc_state_lock);
2772
2773	if (flush_work(&sci->sc_iput_work))
2774		flag = true;
2775
2776	if (flag || !nilfs_segctor_confirm(sci))
2777		nilfs_segctor_write_out(sci);
2778
2779	if (!list_empty(&sci->sc_dirty_files)) {
2780		nilfs_warn(sci->sc_super,
2781			   "disposed unprocessed dirty file(s) when stopping log writer");
2782		nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2783	}
2784
2785	if (!list_empty(&sci->sc_iput_queue)) {
2786		nilfs_warn(sci->sc_super,
2787			   "disposed unprocessed inode(s) in iput queue when stopping log writer");
2788		nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
2789	}
2790
2791	WARN_ON(!list_empty(&sci->sc_segbufs));
2792	WARN_ON(!list_empty(&sci->sc_write_logs));
2793
2794	nilfs_put_root(sci->sc_root);
2795
2796	down_write(&nilfs->ns_segctor_sem);
2797
2798	del_timer_sync(&sci->sc_timer);
2799	kfree(sci);
2800}
2801
2802/**
2803 * nilfs_attach_log_writer - attach log writer
2804 * @sb: super block instance
2805 * @root: root object of the current filesystem tree
2806 *
2807 * This allocates a log writer object, initializes it, and starts the
2808 * log writer.
2809 *
2810 * Return Value: On success, 0 is returned. On error, one of the following
2811 * negative error code is returned.
2812 *
2813 * %-ENOMEM - Insufficient memory available.
2814 */
2815int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2816{
2817	struct the_nilfs *nilfs = sb->s_fs_info;
2818	int err;
2819
2820	if (nilfs->ns_writer) {
2821		/*
2822		 * This happens if the filesystem is made read-only by
2823		 * __nilfs_error or nilfs_remount and then remounted
2824		 * read/write.  In these cases, reuse the existing
2825		 * writer.
2826		 */
2827		return 0;
2828	}
2829
2830	nilfs->ns_writer = nilfs_segctor_new(sb, root);
2831	if (!nilfs->ns_writer)
2832		return -ENOMEM;
2833
2834	inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
2835
2836	err = nilfs_segctor_start_thread(nilfs->ns_writer);
2837	if (unlikely(err))
2838		nilfs_detach_log_writer(sb);
2839
2840	return err;
2841}
2842
2843/**
2844 * nilfs_detach_log_writer - destroy log writer
2845 * @sb: super block instance
2846 *
2847 * This kills log writer daemon, frees the log writer object, and
2848 * destroys list of dirty files.
2849 */
2850void nilfs_detach_log_writer(struct super_block *sb)
2851{
2852	struct the_nilfs *nilfs = sb->s_fs_info;
2853	LIST_HEAD(garbage_list);
2854
2855	down_write(&nilfs->ns_segctor_sem);
2856	if (nilfs->ns_writer) {
2857		nilfs_segctor_destroy(nilfs->ns_writer);
2858		nilfs->ns_writer = NULL;
2859	}
2860	set_nilfs_purging(nilfs);
2861
2862	/* Force to free the list of dirty files */
2863	spin_lock(&nilfs->ns_inode_lock);
2864	if (!list_empty(&nilfs->ns_dirty_files)) {
2865		list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2866		nilfs_warn(sb,
2867			   "disposed unprocessed dirty file(s) when detaching log writer");
2868	}
2869	spin_unlock(&nilfs->ns_inode_lock);
2870	up_write(&nilfs->ns_segctor_sem);
2871
2872	nilfs_dispose_list(nilfs, &garbage_list, 1);
2873	clear_nilfs_purging(nilfs);
2874}
2875