xref: /kernel/linux/linux-5.10/fs/gfs2/super.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/bio.h>
10#include <linux/sched/signal.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/seq_file.h>
17#include <linux/mount.h>
18#include <linux/kthread.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <linux/time.h>
23#include <linux/wait.h>
24#include <linux/writeback.h>
25#include <linux/backing-dev.h>
26#include <linux/kernel.h>
27
28#include "gfs2.h"
29#include "incore.h"
30#include "bmap.h"
31#include "dir.h"
32#include "glock.h"
33#include "glops.h"
34#include "inode.h"
35#include "log.h"
36#include "meta_io.h"
37#include "quota.h"
38#include "recovery.h"
39#include "rgrp.h"
40#include "super.h"
41#include "trans.h"
42#include "util.h"
43#include "sys.h"
44#include "xattr.h"
45#include "lops.h"
46
47enum dinode_demise {
48	SHOULD_DELETE_DINODE,
49	SHOULD_NOT_DELETE_DINODE,
50	SHOULD_DEFER_EVICTION,
51};
52
53/**
54 * gfs2_jindex_free - Clear all the journal index information
55 * @sdp: The GFS2 superblock
56 *
57 */
58
59void gfs2_jindex_free(struct gfs2_sbd *sdp)
60{
61	struct list_head list;
62	struct gfs2_jdesc *jd;
63
64	spin_lock(&sdp->sd_jindex_spin);
65	list_add(&list, &sdp->sd_jindex_list);
66	list_del_init(&sdp->sd_jindex_list);
67	sdp->sd_journals = 0;
68	spin_unlock(&sdp->sd_jindex_spin);
69
70	sdp->sd_jdesc = NULL;
71	while (!list_empty(&list)) {
72		jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
73		gfs2_free_journal_extents(jd);
74		list_del(&jd->jd_list);
75		iput(jd->jd_inode);
76		jd->jd_inode = NULL;
77		kfree(jd);
78	}
79}
80
81static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
82{
83	struct gfs2_jdesc *jd;
84	int found = 0;
85
86	list_for_each_entry(jd, head, jd_list) {
87		if (jd->jd_jid == jid) {
88			found = 1;
89			break;
90		}
91	}
92
93	if (!found)
94		jd = NULL;
95
96	return jd;
97}
98
99struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
100{
101	struct gfs2_jdesc *jd;
102
103	spin_lock(&sdp->sd_jindex_spin);
104	jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
105	spin_unlock(&sdp->sd_jindex_spin);
106
107	return jd;
108}
109
110int gfs2_jdesc_check(struct gfs2_jdesc *jd)
111{
112	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
113	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
114	u64 size = i_size_read(jd->jd_inode);
115
116	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
117		return -EIO;
118
119	jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
120
121	if (gfs2_write_alloc_required(ip, 0, size)) {
122		gfs2_consist_inode(ip);
123		return -EIO;
124	}
125
126	return 0;
127}
128
129/**
130 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
131 * @sdp: the filesystem
132 *
133 * Returns: errno
134 */
135
136int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
137{
138	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
139	struct gfs2_glock *j_gl = ip->i_gl;
140	struct gfs2_log_header_host head;
141	int error;
142
143	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
144	if (gfs2_withdrawn(sdp))
145		return -EIO;
146
147	error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
148	if (error) {
149		gfs2_consist(sdp);
150		return error;
151	}
152
153	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
154		gfs2_consist(sdp);
155		return -EIO;
156	}
157
158	/*  Initialize some head of the log stuff  */
159	sdp->sd_log_sequence = head.lh_sequence + 1;
160	gfs2_log_pointers_init(sdp, head.lh_blkno);
161
162	error = gfs2_quota_init(sdp);
163	if (!error && gfs2_withdrawn(sdp))
164		error = -EIO;
165	if (!error)
166		set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
167	return error;
168}
169
170void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
171{
172	const struct gfs2_statfs_change *str = buf;
173
174	sc->sc_total = be64_to_cpu(str->sc_total);
175	sc->sc_free = be64_to_cpu(str->sc_free);
176	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
177}
178
179void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
180{
181	struct gfs2_statfs_change *str = buf;
182
183	str->sc_total = cpu_to_be64(sc->sc_total);
184	str->sc_free = cpu_to_be64(sc->sc_free);
185	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
186}
187
188int gfs2_statfs_init(struct gfs2_sbd *sdp)
189{
190	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
191	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
192	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
193	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
194	struct buffer_head *m_bh, *l_bh;
195	struct gfs2_holder gh;
196	int error;
197
198	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
199				   &gh);
200	if (error)
201		return error;
202
203	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
204	if (error)
205		goto out;
206
207	if (sdp->sd_args.ar_spectator) {
208		spin_lock(&sdp->sd_statfs_spin);
209		gfs2_statfs_change_in(m_sc, m_bh->b_data +
210				      sizeof(struct gfs2_dinode));
211		spin_unlock(&sdp->sd_statfs_spin);
212	} else {
213		error = gfs2_meta_inode_buffer(l_ip, &l_bh);
214		if (error)
215			goto out_m_bh;
216
217		spin_lock(&sdp->sd_statfs_spin);
218		gfs2_statfs_change_in(m_sc, m_bh->b_data +
219				      sizeof(struct gfs2_dinode));
220		gfs2_statfs_change_in(l_sc, l_bh->b_data +
221				      sizeof(struct gfs2_dinode));
222		spin_unlock(&sdp->sd_statfs_spin);
223
224		brelse(l_bh);
225	}
226
227out_m_bh:
228	brelse(m_bh);
229out:
230	gfs2_glock_dq_uninit(&gh);
231	return 0;
232}
233
234void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
235			s64 dinodes)
236{
237	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
238	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
239	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
240	struct buffer_head *l_bh;
241	s64 x, y;
242	int need_sync = 0;
243	int error;
244
245	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
246	if (error)
247		return;
248
249	gfs2_trans_add_meta(l_ip->i_gl, l_bh);
250
251	spin_lock(&sdp->sd_statfs_spin);
252	l_sc->sc_total += total;
253	l_sc->sc_free += free;
254	l_sc->sc_dinodes += dinodes;
255	gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
256	if (sdp->sd_args.ar_statfs_percent) {
257		x = 100 * l_sc->sc_free;
258		y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
259		if (x >= y || x <= -y)
260			need_sync = 1;
261	}
262	spin_unlock(&sdp->sd_statfs_spin);
263
264	brelse(l_bh);
265	if (need_sync)
266		gfs2_wake_up_statfs(sdp);
267}
268
269void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
270		   struct buffer_head *l_bh)
271{
272	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
273	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
274	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
275	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
276
277	gfs2_trans_add_meta(l_ip->i_gl, l_bh);
278	gfs2_trans_add_meta(m_ip->i_gl, m_bh);
279
280	spin_lock(&sdp->sd_statfs_spin);
281	m_sc->sc_total += l_sc->sc_total;
282	m_sc->sc_free += l_sc->sc_free;
283	m_sc->sc_dinodes += l_sc->sc_dinodes;
284	memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
285	memset(l_bh->b_data + sizeof(struct gfs2_dinode),
286	       0, sizeof(struct gfs2_statfs_change));
287	gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
288	spin_unlock(&sdp->sd_statfs_spin);
289}
290
291int gfs2_statfs_sync(struct super_block *sb, int type)
292{
293	struct gfs2_sbd *sdp = sb->s_fs_info;
294	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
295	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
296	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
297	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
298	struct gfs2_holder gh;
299	struct buffer_head *m_bh, *l_bh;
300	int error;
301
302	sb_start_write(sb);
303	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
304				   &gh);
305	if (error)
306		goto out;
307
308	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
309	if (error)
310		goto out_unlock;
311
312	spin_lock(&sdp->sd_statfs_spin);
313	gfs2_statfs_change_in(m_sc, m_bh->b_data +
314			      sizeof(struct gfs2_dinode));
315	if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
316		spin_unlock(&sdp->sd_statfs_spin);
317		goto out_bh;
318	}
319	spin_unlock(&sdp->sd_statfs_spin);
320
321	error = gfs2_meta_inode_buffer(l_ip, &l_bh);
322	if (error)
323		goto out_bh;
324
325	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
326	if (error)
327		goto out_bh2;
328
329	update_statfs(sdp, m_bh, l_bh);
330	sdp->sd_statfs_force_sync = 0;
331
332	gfs2_trans_end(sdp);
333
334out_bh2:
335	brelse(l_bh);
336out_bh:
337	brelse(m_bh);
338out_unlock:
339	gfs2_glock_dq_uninit(&gh);
340out:
341	sb_end_write(sb);
342	return error;
343}
344
345struct lfcc {
346	struct list_head list;
347	struct gfs2_holder gh;
348};
349
350/**
351 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
352 *                            journals are clean
353 * @sdp: the file system
354 * @state: the state to put the transaction lock into
355 * @t_gh: the hold on the transaction lock
356 *
357 * Returns: errno
358 */
359
360static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
361{
362	struct gfs2_inode *ip;
363	struct gfs2_jdesc *jd;
364	struct lfcc *lfcc;
365	LIST_HEAD(list);
366	struct gfs2_log_header_host lh;
367	int error;
368
369	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
370		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
371		if (!lfcc) {
372			error = -ENOMEM;
373			goto out;
374		}
375		ip = GFS2_I(jd->jd_inode);
376		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
377		if (error) {
378			kfree(lfcc);
379			goto out;
380		}
381		list_add(&lfcc->list, &list);
382	}
383
384	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
385				   LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
386	if (error)
387		goto out;
388
389	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
390		error = gfs2_jdesc_check(jd);
391		if (error)
392			break;
393		error = gfs2_find_jhead(jd, &lh, false);
394		if (error)
395			break;
396		if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
397			error = -EBUSY;
398			break;
399		}
400	}
401
402	if (error)
403		gfs2_freeze_unlock(&sdp->sd_freeze_gh);
404
405out:
406	while (!list_empty(&list)) {
407		lfcc = list_first_entry(&list, struct lfcc, list);
408		list_del(&lfcc->list);
409		gfs2_glock_dq_uninit(&lfcc->gh);
410		kfree(lfcc);
411	}
412	return error;
413}
414
415void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
416{
417	struct gfs2_dinode *str = buf;
418
419	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
420	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
421	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
422	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
423	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
424	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
425	str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
426	str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
427	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
428	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
429	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
430	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
431	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
432	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
433
434	str->di_goal_meta = cpu_to_be64(ip->i_goal);
435	str->di_goal_data = cpu_to_be64(ip->i_goal);
436	str->di_generation = cpu_to_be64(ip->i_generation);
437
438	str->di_flags = cpu_to_be32(ip->i_diskflags);
439	str->di_height = cpu_to_be16(ip->i_height);
440	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
441					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
442					     GFS2_FORMAT_DE : 0);
443	str->di_depth = cpu_to_be16(ip->i_depth);
444	str->di_entries = cpu_to_be32(ip->i_entries);
445
446	str->di_eattr = cpu_to_be64(ip->i_eattr);
447	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
448	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
449	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
450}
451
452/**
453 * gfs2_write_inode - Make sure the inode is stable on the disk
454 * @inode: The inode
455 * @wbc: The writeback control structure
456 *
457 * Returns: errno
458 */
459
460static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
461{
462	struct gfs2_inode *ip = GFS2_I(inode);
463	struct gfs2_sbd *sdp = GFS2_SB(inode);
464	struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
465	struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
466	int ret = 0;
467	bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
468
469	if (flush_all)
470		gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
471			       GFS2_LOG_HEAD_FLUSH_NORMAL |
472			       GFS2_LFC_WRITE_INODE);
473	if (bdi->wb.dirty_exceeded)
474		gfs2_ail1_flush(sdp, wbc);
475	else
476		filemap_fdatawrite(metamapping);
477	if (flush_all)
478		ret = filemap_fdatawait(metamapping);
479	if (ret)
480		mark_inode_dirty_sync(inode);
481	else {
482		spin_lock(&inode->i_lock);
483		if (!(inode->i_flags & I_DIRTY))
484			gfs2_ordered_del_inode(ip);
485		spin_unlock(&inode->i_lock);
486	}
487	return ret;
488}
489
490/**
491 * gfs2_dirty_inode - check for atime updates
492 * @inode: The inode in question
493 * @flags: The type of dirty
494 *
495 * Unfortunately it can be called under any combination of inode
496 * glock and transaction lock, so we have to check carefully.
497 *
498 * At the moment this deals only with atime - it should be possible
499 * to expand that role in future, once a review of the locking has
500 * been carried out.
501 */
502
503static void gfs2_dirty_inode(struct inode *inode, int flags)
504{
505	struct gfs2_inode *ip = GFS2_I(inode);
506	struct gfs2_sbd *sdp = GFS2_SB(inode);
507	struct buffer_head *bh;
508	struct gfs2_holder gh;
509	int need_unlock = 0;
510	int need_endtrans = 0;
511	int ret;
512
513	if (!(flags & I_DIRTY_INODE))
514		return;
515	if (unlikely(gfs2_withdrawn(sdp)))
516		return;
517	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
518		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
519		if (ret) {
520			fs_err(sdp, "dirty_inode: glock %d\n", ret);
521			gfs2_dump_glock(NULL, ip->i_gl, true);
522			return;
523		}
524		need_unlock = 1;
525	} else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
526		return;
527
528	if (current->journal_info == NULL) {
529		ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
530		if (ret) {
531			fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
532			goto out;
533		}
534		need_endtrans = 1;
535	}
536
537	ret = gfs2_meta_inode_buffer(ip, &bh);
538	if (ret == 0) {
539		gfs2_trans_add_meta(ip->i_gl, bh);
540		gfs2_dinode_out(ip, bh->b_data);
541		brelse(bh);
542	}
543
544	if (need_endtrans)
545		gfs2_trans_end(sdp);
546out:
547	if (need_unlock)
548		gfs2_glock_dq_uninit(&gh);
549}
550
551/**
552 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
553 * @sdp: the filesystem
554 *
555 * Returns: errno
556 */
557
558int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
559{
560	int error = 0;
561	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
562
563	gfs2_flush_delete_work(sdp);
564	if (!log_write_allowed && current == sdp->sd_quotad_process)
565		fs_warn(sdp, "The quotad daemon is withdrawing.\n");
566	else if (sdp->sd_quotad_process)
567		kthread_stop(sdp->sd_quotad_process);
568	sdp->sd_quotad_process = NULL;
569
570	if (!log_write_allowed && current == sdp->sd_logd_process)
571		fs_warn(sdp, "The logd daemon is withdrawing.\n");
572	else if (sdp->sd_logd_process)
573		kthread_stop(sdp->sd_logd_process);
574	sdp->sd_logd_process = NULL;
575
576	if (log_write_allowed) {
577		gfs2_quota_sync(sdp->sd_vfs, 0);
578		gfs2_statfs_sync(sdp->sd_vfs, 0);
579
580		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
581			       GFS2_LFC_MAKE_FS_RO);
582		wait_event(sdp->sd_reserving_log_wait,
583			   atomic_read(&sdp->sd_reserving_log) == 0);
584		gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
585				 sdp->sd_jdesc->jd_blocks);
586	} else {
587		wait_event_timeout(sdp->sd_reserving_log_wait,
588				   atomic_read(&sdp->sd_reserving_log) == 0,
589				   HZ * 5);
590	}
591	gfs2_quota_cleanup(sdp);
592
593	if (!log_write_allowed)
594		sdp->sd_vfs->s_flags |= SB_RDONLY;
595
596	return error;
597}
598
599/**
600 * gfs2_put_super - Unmount the filesystem
601 * @sb: The VFS superblock
602 *
603 */
604
605static void gfs2_put_super(struct super_block *sb)
606{
607	struct gfs2_sbd *sdp = sb->s_fs_info;
608	int error;
609	struct gfs2_jdesc *jd;
610
611	/* No more recovery requests */
612	set_bit(SDF_NORECOVERY, &sdp->sd_flags);
613	smp_mb();
614
615	/* Wait on outstanding recovery */
616restart:
617	spin_lock(&sdp->sd_jindex_spin);
618	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
619		if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
620			continue;
621		spin_unlock(&sdp->sd_jindex_spin);
622		wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
623			    TASK_UNINTERRUPTIBLE);
624		goto restart;
625	}
626	spin_unlock(&sdp->sd_jindex_spin);
627
628	if (!sb_rdonly(sb)) {
629		error = gfs2_make_fs_ro(sdp);
630		if (error)
631			gfs2_io_error(sdp);
632	}
633	WARN_ON(gfs2_withdrawing(sdp));
634
635	/*  At this point, we're through modifying the disk  */
636
637	/*  Release stuff  */
638
639	iput(sdp->sd_jindex);
640	iput(sdp->sd_statfs_inode);
641	iput(sdp->sd_rindex);
642	iput(sdp->sd_quota_inode);
643
644	gfs2_glock_put(sdp->sd_rename_gl);
645	gfs2_glock_put(sdp->sd_freeze_gl);
646
647	if (!sdp->sd_args.ar_spectator) {
648		if (gfs2_holder_initialized(&sdp->sd_journal_gh))
649			gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
650		if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
651			gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
652		gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
653		gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
654		free_local_statfs_inodes(sdp);
655		iput(sdp->sd_qc_inode);
656	}
657
658	gfs2_glock_dq_uninit(&sdp->sd_live_gh);
659	gfs2_clear_rgrpd(sdp);
660	gfs2_jindex_free(sdp);
661	/*  Take apart glock structures and buffer lists  */
662	gfs2_gl_hash_clear(sdp);
663	truncate_inode_pages_final(&sdp->sd_aspace);
664	gfs2_delete_debugfs_file(sdp);
665	/*  Unmount the locking protocol  */
666	gfs2_lm_unmount(sdp);
667
668	/*  At this point, we're through participating in the lockspace  */
669	gfs2_sys_fs_del(sdp);
670	free_sbd(sdp);
671}
672
673/**
674 * gfs2_sync_fs - sync the filesystem
675 * @sb: the superblock
676 *
677 * Flushes the log to disk.
678 */
679
680static int gfs2_sync_fs(struct super_block *sb, int wait)
681{
682	struct gfs2_sbd *sdp = sb->s_fs_info;
683
684	gfs2_quota_sync(sb, -1);
685	if (wait)
686		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
687			       GFS2_LFC_SYNC_FS);
688	return sdp->sd_log_error;
689}
690
691void gfs2_freeze_func(struct work_struct *work)
692{
693	int error;
694	struct gfs2_holder freeze_gh;
695	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
696	struct super_block *sb = sdp->sd_vfs;
697
698	atomic_inc(&sb->s_active);
699	error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
700	if (error) {
701		gfs2_assert_withdraw(sdp, 0);
702	} else {
703		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
704		error = thaw_super(sb);
705		if (error) {
706			fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
707				error);
708			gfs2_assert_withdraw(sdp, 0);
709		}
710		gfs2_freeze_unlock(&freeze_gh);
711	}
712	deactivate_super(sb);
713	clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
714	wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
715	return;
716}
717
718/**
719 * gfs2_freeze - prevent further writes to the filesystem
720 * @sb: the VFS structure for the filesystem
721 *
722 */
723
724static int gfs2_freeze(struct super_block *sb)
725{
726	struct gfs2_sbd *sdp = sb->s_fs_info;
727	int error;
728
729	mutex_lock(&sdp->sd_freeze_mutex);
730	if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
731		error = -EBUSY;
732		goto out;
733	}
734
735	for (;;) {
736		if (gfs2_withdrawn(sdp)) {
737			error = -EINVAL;
738			goto out;
739		}
740
741		error = gfs2_lock_fs_check_clean(sdp);
742		if (!error)
743			break;
744
745		if (error == -EBUSY)
746			fs_err(sdp, "waiting for recovery before freeze\n");
747		else if (error == -EIO) {
748			fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
749			       "to recovery error.\n");
750			goto out;
751		} else {
752			fs_err(sdp, "error freezing FS: %d\n", error);
753		}
754		fs_err(sdp, "retrying...\n");
755		msleep(1000);
756	}
757	set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
758out:
759	mutex_unlock(&sdp->sd_freeze_mutex);
760	return error;
761}
762
763/**
764 * gfs2_unfreeze - reallow writes to the filesystem
765 * @sb: the VFS structure for the filesystem
766 *
767 */
768
769static int gfs2_unfreeze(struct super_block *sb)
770{
771	struct gfs2_sbd *sdp = sb->s_fs_info;
772
773	mutex_lock(&sdp->sd_freeze_mutex);
774	if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
775	    !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
776		mutex_unlock(&sdp->sd_freeze_mutex);
777		return -EINVAL;
778	}
779
780	gfs2_freeze_unlock(&sdp->sd_freeze_gh);
781	mutex_unlock(&sdp->sd_freeze_mutex);
782	return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
783}
784
785/**
786 * statfs_fill - fill in the sg for a given RG
787 * @rgd: the RG
788 * @sc: the sc structure
789 *
790 * Returns: 0 on success, -ESTALE if the LVB is invalid
791 */
792
793static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
794			    struct gfs2_statfs_change_host *sc)
795{
796	gfs2_rgrp_verify(rgd);
797	sc->sc_total += rgd->rd_data;
798	sc->sc_free += rgd->rd_free;
799	sc->sc_dinodes += rgd->rd_dinodes;
800	return 0;
801}
802
803/**
804 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
805 * @sdp: the filesystem
806 * @sc: the sc info that will be returned
807 *
808 * Any error (other than a signal) will cause this routine to fall back
809 * to the synchronous version.
810 *
811 * FIXME: This really shouldn't busy wait like this.
812 *
813 * Returns: errno
814 */
815
816static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
817{
818	struct gfs2_rgrpd *rgd_next;
819	struct gfs2_holder *gha, *gh;
820	unsigned int slots = 64;
821	unsigned int x;
822	int done;
823	int error = 0, err;
824
825	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
826	gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
827	if (!gha)
828		return -ENOMEM;
829	for (x = 0; x < slots; x++)
830		gfs2_holder_mark_uninitialized(gha + x);
831
832	rgd_next = gfs2_rgrpd_get_first(sdp);
833
834	for (;;) {
835		done = 1;
836
837		for (x = 0; x < slots; x++) {
838			gh = gha + x;
839
840			if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
841				err = gfs2_glock_wait(gh);
842				if (err) {
843					gfs2_holder_uninit(gh);
844					error = err;
845				} else {
846					if (!error) {
847						struct gfs2_rgrpd *rgd =
848							gfs2_glock2rgrp(gh->gh_gl);
849
850						error = statfs_slow_fill(rgd, sc);
851					}
852					gfs2_glock_dq_uninit(gh);
853				}
854			}
855
856			if (gfs2_holder_initialized(gh))
857				done = 0;
858			else if (rgd_next && !error) {
859				error = gfs2_glock_nq_init(rgd_next->rd_gl,
860							   LM_ST_SHARED,
861							   GL_ASYNC,
862							   gh);
863				rgd_next = gfs2_rgrpd_get_next(rgd_next);
864				done = 0;
865			}
866
867			if (signal_pending(current))
868				error = -ERESTARTSYS;
869		}
870
871		if (done)
872			break;
873
874		yield();
875	}
876
877	kfree(gha);
878	return error;
879}
880
881/**
882 * gfs2_statfs_i - Do a statfs
883 * @sdp: the filesystem
884 * @sg: the sg structure
885 *
886 * Returns: errno
887 */
888
889static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
890{
891	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
892	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
893
894	spin_lock(&sdp->sd_statfs_spin);
895
896	*sc = *m_sc;
897	sc->sc_total += l_sc->sc_total;
898	sc->sc_free += l_sc->sc_free;
899	sc->sc_dinodes += l_sc->sc_dinodes;
900
901	spin_unlock(&sdp->sd_statfs_spin);
902
903	if (sc->sc_free < 0)
904		sc->sc_free = 0;
905	if (sc->sc_free > sc->sc_total)
906		sc->sc_free = sc->sc_total;
907	if (sc->sc_dinodes < 0)
908		sc->sc_dinodes = 0;
909
910	return 0;
911}
912
913/**
914 * gfs2_statfs - Gather and return stats about the filesystem
915 * @sb: The superblock
916 * @statfsbuf: The buffer
917 *
918 * Returns: 0 on success or error code
919 */
920
921static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
922{
923	struct super_block *sb = dentry->d_sb;
924	struct gfs2_sbd *sdp = sb->s_fs_info;
925	struct gfs2_statfs_change_host sc;
926	int error;
927
928	error = gfs2_rindex_update(sdp);
929	if (error)
930		return error;
931
932	if (gfs2_tune_get(sdp, gt_statfs_slow))
933		error = gfs2_statfs_slow(sdp, &sc);
934	else
935		error = gfs2_statfs_i(sdp, &sc);
936
937	if (error)
938		return error;
939
940	buf->f_type = GFS2_MAGIC;
941	buf->f_bsize = sdp->sd_sb.sb_bsize;
942	buf->f_blocks = sc.sc_total;
943	buf->f_bfree = sc.sc_free;
944	buf->f_bavail = sc.sc_free;
945	buf->f_files = sc.sc_dinodes + sc.sc_free;
946	buf->f_ffree = sc.sc_free;
947	buf->f_namelen = GFS2_FNAMESIZE;
948
949	return 0;
950}
951
952/**
953 * gfs2_drop_inode - Drop an inode (test for remote unlink)
954 * @inode: The inode to drop
955 *
956 * If we've received a callback on an iopen lock then it's because a
957 * remote node tried to deallocate the inode but failed due to this node
958 * still having the inode open. Here we mark the link count zero
959 * since we know that it must have reached zero if the GLF_DEMOTE flag
960 * is set on the iopen glock. If we didn't do a disk read since the
961 * remote node removed the final link then we might otherwise miss
962 * this event. This check ensures that this node will deallocate the
963 * inode's blocks, or alternatively pass the baton on to another
964 * node for later deallocation.
965 */
966
967static int gfs2_drop_inode(struct inode *inode)
968{
969	struct gfs2_inode *ip = GFS2_I(inode);
970
971	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
972	    inode->i_nlink &&
973	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
974		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
975		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
976			clear_nlink(inode);
977	}
978
979	/*
980	 * When under memory pressure when an inode's link count has dropped to
981	 * zero, defer deleting the inode to the delete workqueue.  This avoids
982	 * calling into DLM under memory pressure, which can deadlock.
983	 */
984	if (!inode->i_nlink &&
985	    unlikely(current->flags & PF_MEMALLOC) &&
986	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
987		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
988
989		gfs2_glock_hold(gl);
990		if (!gfs2_queue_delete_work(gl, 0))
991			gfs2_glock_queue_put(gl);
992		return false;
993	}
994
995	return generic_drop_inode(inode);
996}
997
998static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
999{
1000	do {
1001		if (d1 == d2)
1002			return 1;
1003		d1 = d1->d_parent;
1004	} while (!IS_ROOT(d1));
1005	return 0;
1006}
1007
1008/**
1009 * gfs2_show_options - Show mount options for /proc/mounts
1010 * @s: seq_file structure
1011 * @root: root of this (sub)tree
1012 *
1013 * Returns: 0 on success or error code
1014 */
1015
1016static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1017{
1018	struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
1019	struct gfs2_args *args = &sdp->sd_args;
1020	unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
1021
1022	spin_lock(&sdp->sd_tune.gt_spin);
1023	logd_secs = sdp->sd_tune.gt_logd_secs;
1024	quota_quantum = sdp->sd_tune.gt_quota_quantum;
1025	statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
1026	statfs_slow = sdp->sd_tune.gt_statfs_slow;
1027	spin_unlock(&sdp->sd_tune.gt_spin);
1028
1029	if (is_ancestor(root, sdp->sd_master_dir))
1030		seq_puts(s, ",meta");
1031	if (args->ar_lockproto[0])
1032		seq_show_option(s, "lockproto", args->ar_lockproto);
1033	if (args->ar_locktable[0])
1034		seq_show_option(s, "locktable", args->ar_locktable);
1035	if (args->ar_hostdata[0])
1036		seq_show_option(s, "hostdata", args->ar_hostdata);
1037	if (args->ar_spectator)
1038		seq_puts(s, ",spectator");
1039	if (args->ar_localflocks)
1040		seq_puts(s, ",localflocks");
1041	if (args->ar_debug)
1042		seq_puts(s, ",debug");
1043	if (args->ar_posix_acl)
1044		seq_puts(s, ",acl");
1045	if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
1046		char *state;
1047		switch (args->ar_quota) {
1048		case GFS2_QUOTA_OFF:
1049			state = "off";
1050			break;
1051		case GFS2_QUOTA_ACCOUNT:
1052			state = "account";
1053			break;
1054		case GFS2_QUOTA_ON:
1055			state = "on";
1056			break;
1057		default:
1058			state = "unknown";
1059			break;
1060		}
1061		seq_printf(s, ",quota=%s", state);
1062	}
1063	if (args->ar_suiddir)
1064		seq_puts(s, ",suiddir");
1065	if (args->ar_data != GFS2_DATA_DEFAULT) {
1066		char *state;
1067		switch (args->ar_data) {
1068		case GFS2_DATA_WRITEBACK:
1069			state = "writeback";
1070			break;
1071		case GFS2_DATA_ORDERED:
1072			state = "ordered";
1073			break;
1074		default:
1075			state = "unknown";
1076			break;
1077		}
1078		seq_printf(s, ",data=%s", state);
1079	}
1080	if (args->ar_discard)
1081		seq_puts(s, ",discard");
1082	if (logd_secs != 30)
1083		seq_printf(s, ",commit=%d", logd_secs);
1084	if (statfs_quantum != 30)
1085		seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
1086	else if (statfs_slow)
1087		seq_puts(s, ",statfs_quantum=0");
1088	if (quota_quantum != 60)
1089		seq_printf(s, ",quota_quantum=%d", quota_quantum);
1090	if (args->ar_statfs_percent)
1091		seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1092	if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1093		const char *state;
1094
1095		switch (args->ar_errors) {
1096		case GFS2_ERRORS_WITHDRAW:
1097			state = "withdraw";
1098			break;
1099		case GFS2_ERRORS_PANIC:
1100			state = "panic";
1101			break;
1102		default:
1103			state = "unknown";
1104			break;
1105		}
1106		seq_printf(s, ",errors=%s", state);
1107	}
1108	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1109		seq_puts(s, ",nobarrier");
1110	if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1111		seq_puts(s, ",demote_interface_used");
1112	if (args->ar_rgrplvb)
1113		seq_puts(s, ",rgrplvb");
1114	if (args->ar_loccookie)
1115		seq_puts(s, ",loccookie");
1116	return 0;
1117}
1118
1119static void gfs2_final_release_pages(struct gfs2_inode *ip)
1120{
1121	struct inode *inode = &ip->i_inode;
1122	struct gfs2_glock *gl = ip->i_gl;
1123
1124	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1125	truncate_inode_pages(&inode->i_data, 0);
1126
1127	if (atomic_read(&gl->gl_revokes) == 0) {
1128		clear_bit(GLF_LFLUSH, &gl->gl_flags);
1129		clear_bit(GLF_DIRTY, &gl->gl_flags);
1130	}
1131}
1132
1133static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1134{
1135	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1136	struct gfs2_rgrpd *rgd;
1137	struct gfs2_holder gh;
1138	int error;
1139
1140	if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1141		gfs2_consist_inode(ip);
1142		return -EIO;
1143	}
1144
1145	error = gfs2_rindex_update(sdp);
1146	if (error)
1147		return error;
1148
1149	error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1150	if (error)
1151		return error;
1152
1153	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1154	if (!rgd) {
1155		gfs2_consist_inode(ip);
1156		error = -EIO;
1157		goto out_qs;
1158	}
1159
1160	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1161	if (error)
1162		goto out_qs;
1163
1164	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1165				 sdp->sd_jdesc->jd_blocks);
1166	if (error)
1167		goto out_rg_gunlock;
1168
1169	gfs2_free_di(rgd, ip);
1170
1171	gfs2_final_release_pages(ip);
1172
1173	gfs2_trans_end(sdp);
1174
1175out_rg_gunlock:
1176	gfs2_glock_dq_uninit(&gh);
1177out_qs:
1178	gfs2_quota_unhold(ip);
1179	return error;
1180}
1181
1182/**
1183 * gfs2_glock_put_eventually
1184 * @gl:	The glock to put
1185 *
1186 * When under memory pressure, trigger a deferred glock put to make sure we
1187 * won't call into DLM and deadlock.  Otherwise, put the glock directly.
1188 */
1189
1190static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
1191{
1192	if (current->flags & PF_MEMALLOC)
1193		gfs2_glock_queue_put(gl);
1194	else
1195		gfs2_glock_put(gl);
1196}
1197
1198static bool gfs2_upgrade_iopen_glock(struct inode *inode)
1199{
1200	struct gfs2_inode *ip = GFS2_I(inode);
1201	struct gfs2_sbd *sdp = GFS2_SB(inode);
1202	struct gfs2_holder *gh = &ip->i_iopen_gh;
1203	long timeout = 5 * HZ;
1204	int error;
1205
1206	gh->gh_flags |= GL_NOCACHE;
1207	gfs2_glock_dq_wait(gh);
1208
1209	/*
1210	 * If there are no other lock holders, we'll get the lock immediately.
1211	 * Otherwise, the other nodes holding the lock will be notified about
1212	 * our locking request.  If they don't have the inode open, they'll
1213	 * evict the cached inode and release the lock.  Otherwise, if they
1214	 * poke the inode glock, we'll take this as an indication that they
1215	 * still need the iopen glock and that they'll take care of deleting
1216	 * the inode when they're done.  As a last resort, if another node
1217	 * keeps holding the iopen glock without showing any activity on the
1218	 * inode glock, we'll eventually time out.
1219	 *
1220	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
1221	 * locking request as an optimization to notify lock holders as soon as
1222	 * possible.  Without that flag, they'd be notified implicitly by the
1223	 * second locking request.
1224	 */
1225
1226	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
1227	error = gfs2_glock_nq(gh);
1228	if (error != GLR_TRYFAILED)
1229		return !error;
1230
1231	gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
1232	error = gfs2_glock_nq(gh);
1233	if (error)
1234		return false;
1235
1236	timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
1237		!test_bit(HIF_WAIT, &gh->gh_iflags) ||
1238		test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
1239		timeout);
1240	if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
1241		gfs2_glock_dq(gh);
1242		return false;
1243	}
1244	return true;
1245}
1246
1247/**
1248 * evict_should_delete - determine whether the inode is eligible for deletion
1249 * @inode: The inode to evict
1250 *
1251 * This function determines whether the evicted inode is eligible to be deleted
1252 * and locks the inode glock.
1253 *
1254 * Returns: the fate of the dinode
1255 */
1256static enum dinode_demise evict_should_delete(struct inode *inode,
1257					      struct gfs2_holder *gh)
1258{
1259	struct gfs2_inode *ip = GFS2_I(inode);
1260	struct super_block *sb = inode->i_sb;
1261	struct gfs2_sbd *sdp = sb->s_fs_info;
1262	int ret;
1263
1264	if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
1265		BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
1266		goto should_delete;
1267	}
1268
1269	if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
1270		return SHOULD_DEFER_EVICTION;
1271
1272	/* Deletes should never happen under memory pressure anymore.  */
1273	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
1274		return SHOULD_DEFER_EVICTION;
1275
1276	/* Must not read inode block until block type has been verified */
1277	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
1278	if (unlikely(ret)) {
1279		glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1280		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1281		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1282		return SHOULD_DEFER_EVICTION;
1283	}
1284
1285	if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
1286		return SHOULD_NOT_DELETE_DINODE;
1287	ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1288	if (ret)
1289		return SHOULD_NOT_DELETE_DINODE;
1290
1291	if (test_bit(GIF_INVALID, &ip->i_flags)) {
1292		ret = gfs2_inode_refresh(ip);
1293		if (ret)
1294			return SHOULD_NOT_DELETE_DINODE;
1295	}
1296
1297	/*
1298	 * The inode may have been recreated in the meantime.
1299	 */
1300	if (inode->i_nlink)
1301		return SHOULD_NOT_DELETE_DINODE;
1302
1303should_delete:
1304	if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
1305	    test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1306		if (!gfs2_upgrade_iopen_glock(inode)) {
1307			gfs2_holder_uninit(&ip->i_iopen_gh);
1308			return SHOULD_NOT_DELETE_DINODE;
1309		}
1310	}
1311	return SHOULD_DELETE_DINODE;
1312}
1313
1314/**
1315 * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
1316 * @inode: The inode to evict
1317 */
1318static int evict_unlinked_inode(struct inode *inode)
1319{
1320	struct gfs2_inode *ip = GFS2_I(inode);
1321	int ret;
1322
1323	if (S_ISDIR(inode->i_mode) &&
1324	    (ip->i_diskflags & GFS2_DIF_EXHASH)) {
1325		ret = gfs2_dir_exhash_dealloc(ip);
1326		if (ret)
1327			goto out;
1328	}
1329
1330	if (ip->i_eattr) {
1331		ret = gfs2_ea_dealloc(ip);
1332		if (ret)
1333			goto out;
1334	}
1335
1336	if (!gfs2_is_stuffed(ip)) {
1337		ret = gfs2_file_dealloc(ip);
1338		if (ret)
1339			goto out;
1340	}
1341
1342	/* We're about to clear the bitmap for the dinode, but as soon as we
1343	   do, gfs2_create_inode can create another inode at the same block
1344	   location and try to set gl_object again. We clear gl_object here so
1345	   that subsequent inode creates don't see an old gl_object. */
1346	glock_clear_object(ip->i_gl, ip);
1347	ret = gfs2_dinode_dealloc(ip);
1348	gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
1349out:
1350	return ret;
1351}
1352
1353/*
1354 * evict_linked_inode - evict an inode whose dinode has not been unlinked
1355 * @inode: The inode to evict
1356 */
1357static int evict_linked_inode(struct inode *inode)
1358{
1359	struct super_block *sb = inode->i_sb;
1360	struct gfs2_sbd *sdp = sb->s_fs_info;
1361	struct gfs2_inode *ip = GFS2_I(inode);
1362	struct address_space *metamapping;
1363	int ret;
1364
1365	gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
1366		       GFS2_LFC_EVICT_INODE);
1367	metamapping = gfs2_glock2aspace(ip->i_gl);
1368	if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1369		filemap_fdatawrite(metamapping);
1370		filemap_fdatawait(metamapping);
1371	}
1372	write_inode_now(inode, 1);
1373	gfs2_ail_flush(ip->i_gl, 0);
1374
1375	ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1376	if (ret)
1377		return ret;
1378
1379	/* Needs to be done before glock release & also in a transaction */
1380	truncate_inode_pages(&inode->i_data, 0);
1381	truncate_inode_pages(metamapping, 0);
1382	gfs2_trans_end(sdp);
1383	return 0;
1384}
1385
1386/**
1387 * gfs2_evict_inode - Remove an inode from cache
1388 * @inode: The inode to evict
1389 *
1390 * There are three cases to consider:
1391 * 1. i_nlink == 0, we are final opener (and must deallocate)
1392 * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
1393 * 3. i_nlink > 0
1394 *
1395 * If the fs is read only, then we have to treat all cases as per #3
1396 * since we are unable to do any deallocation. The inode will be
1397 * deallocated by the next read/write node to attempt an allocation
1398 * in the same resource group
1399 *
1400 * We have to (at the moment) hold the inodes main lock to cover
1401 * the gap between unlocking the shared lock on the iopen lock and
1402 * taking the exclusive lock. I'd rather do a shared -> exclusive
1403 * conversion on the iopen lock, but we can change that later. This
1404 * is safe, just less efficient.
1405 */
1406
1407static void gfs2_evict_inode(struct inode *inode)
1408{
1409	struct super_block *sb = inode->i_sb;
1410	struct gfs2_sbd *sdp = sb->s_fs_info;
1411	struct gfs2_inode *ip = GFS2_I(inode);
1412	struct gfs2_holder gh;
1413	int ret;
1414
1415	if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
1416		clear_inode(inode);
1417		return;
1418	}
1419
1420	if (inode->i_nlink || sb_rdonly(sb))
1421		goto out;
1422
1423	/*
1424	 * In case of an incomplete mount, gfs2_evict_inode() may be called for
1425	 * system files without having an active journal to write to.  In that
1426	 * case, skip the filesystem evict.
1427	 */
1428	if (!sdp->sd_jdesc)
1429		goto out;
1430
1431	gfs2_holder_mark_uninitialized(&gh);
1432	ret = evict_should_delete(inode, &gh);
1433	if (ret == SHOULD_DEFER_EVICTION)
1434		goto out;
1435	if (ret == SHOULD_DELETE_DINODE)
1436		ret = evict_unlinked_inode(inode);
1437	else
1438		ret = evict_linked_inode(inode);
1439
1440	if (gfs2_rs_active(&ip->i_res))
1441		gfs2_rs_deltree(&ip->i_res);
1442
1443	if (gfs2_holder_initialized(&gh)) {
1444		glock_clear_object(ip->i_gl, ip);
1445		gfs2_glock_dq_uninit(&gh);
1446	}
1447	if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
1448		fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
1449out:
1450	truncate_inode_pages_final(&inode->i_data);
1451	if (ip->i_qadata)
1452		gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
1453	gfs2_rs_deltree(&ip->i_res);
1454	gfs2_ordered_del_inode(ip);
1455	clear_inode(inode);
1456	gfs2_dir_hash_inval(ip);
1457	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1458		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1459
1460		glock_clear_object(gl, ip);
1461		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1462			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1463			gfs2_glock_dq(&ip->i_iopen_gh);
1464		}
1465		gfs2_glock_hold(gl);
1466		gfs2_holder_uninit(&ip->i_iopen_gh);
1467		gfs2_glock_put_eventually(gl);
1468	}
1469	if (ip->i_gl) {
1470		glock_clear_object(ip->i_gl, ip);
1471		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
1472		gfs2_glock_add_to_lru(ip->i_gl);
1473		gfs2_glock_put_eventually(ip->i_gl);
1474		rcu_assign_pointer(ip->i_gl, NULL);
1475	}
1476}
1477
1478static struct inode *gfs2_alloc_inode(struct super_block *sb)
1479{
1480	struct gfs2_inode *ip;
1481
1482	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
1483	if (!ip)
1484		return NULL;
1485	ip->i_flags = 0;
1486	ip->i_gl = NULL;
1487	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
1488	memset(&ip->i_res, 0, sizeof(ip->i_res));
1489	RB_CLEAR_NODE(&ip->i_res.rs_node);
1490	ip->i_rahead = 0;
1491	return &ip->i_inode;
1492}
1493
1494static void gfs2_free_inode(struct inode *inode)
1495{
1496	kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
1497}
1498
1499extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
1500{
1501	struct local_statfs_inode *lsi, *safe;
1502
1503	/* Run through the statfs inodes list to iput and free memory */
1504	list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
1505		if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
1506			sdp->sd_sc_inode = NULL; /* belongs to this node */
1507		if (lsi->si_sc_inode)
1508			iput(lsi->si_sc_inode);
1509		list_del(&lsi->si_list);
1510		kfree(lsi);
1511	}
1512}
1513
1514extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
1515					     unsigned int index)
1516{
1517	struct local_statfs_inode *lsi;
1518
1519	/* Return the local (per node) statfs inode in the
1520	 * sdp->sd_sc_inodes_list corresponding to the 'index'. */
1521	list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
1522		if (lsi->si_jid == index)
1523			return lsi->si_sc_inode;
1524	}
1525	return NULL;
1526}
1527
1528const struct super_operations gfs2_super_ops = {
1529	.alloc_inode		= gfs2_alloc_inode,
1530	.free_inode		= gfs2_free_inode,
1531	.write_inode		= gfs2_write_inode,
1532	.dirty_inode		= gfs2_dirty_inode,
1533	.evict_inode		= gfs2_evict_inode,
1534	.put_super		= gfs2_put_super,
1535	.sync_fs		= gfs2_sync_fs,
1536	.freeze_super		= gfs2_freeze,
1537	.thaw_super		= gfs2_unfreeze,
1538	.statfs			= gfs2_statfs,
1539	.drop_inode		= gfs2_drop_inode,
1540	.show_options		= gfs2_show_options,
1541};
1542
1543