xref: /kernel/linux/linux-6.6/fs/xfs/xfs_qm.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_inode.h"
16#include "xfs_iwalk.h"
17#include "xfs_quota.h"
18#include "xfs_bmap.h"
19#include "xfs_bmap_util.h"
20#include "xfs_trans.h"
21#include "xfs_trans_space.h"
22#include "xfs_qm.h"
23#include "xfs_trace.h"
24#include "xfs_icache.h"
25#include "xfs_error.h"
26#include "xfs_ag.h"
27#include "xfs_ialloc.h"
28#include "xfs_log_priv.h"
29
30/*
31 * The global quota manager. There is only one of these for the entire
32 * system, _not_ one per file system. XQM keeps track of the overall
33 * quota functionality, including maintaining the freelist and hash
34 * tables of dquots.
35 */
36STATIC int	xfs_qm_init_quotainos(struct xfs_mount *mp);
37STATIC int	xfs_qm_init_quotainfo(struct xfs_mount *mp);
38
39STATIC void	xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi);
40STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
41/*
42 * We use the batch lookup interface to iterate over the dquots as it
43 * currently is the only interface into the radix tree code that allows
44 * fuzzy lookups instead of exact matches.  Holding the lock over multiple
45 * operations is fine as all callers are used either during mount/umount
46 * or quotaoff.
47 */
48#define XFS_DQ_LOOKUP_BATCH	32
49
50STATIC int
51xfs_qm_dquot_walk(
52	struct xfs_mount	*mp,
53	xfs_dqtype_t		type,
54	int			(*execute)(struct xfs_dquot *dqp, void *data),
55	void			*data)
56{
57	struct xfs_quotainfo	*qi = mp->m_quotainfo;
58	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
59	uint32_t		next_index;
60	int			last_error = 0;
61	int			skipped;
62	int			nr_found;
63
64restart:
65	skipped = 0;
66	next_index = 0;
67	nr_found = 0;
68
69	while (1) {
70		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
71		int		error;
72		int		i;
73
74		mutex_lock(&qi->qi_tree_lock);
75		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
76					next_index, XFS_DQ_LOOKUP_BATCH);
77		if (!nr_found) {
78			mutex_unlock(&qi->qi_tree_lock);
79			break;
80		}
81
82		for (i = 0; i < nr_found; i++) {
83			struct xfs_dquot *dqp = batch[i];
84
85			next_index = dqp->q_id + 1;
86
87			error = execute(batch[i], data);
88			if (error == -EAGAIN) {
89				skipped++;
90				continue;
91			}
92			if (error && last_error != -EFSCORRUPTED)
93				last_error = error;
94		}
95
96		mutex_unlock(&qi->qi_tree_lock);
97
98		/* bail out if the filesystem is corrupted.  */
99		if (last_error == -EFSCORRUPTED) {
100			skipped = 0;
101			break;
102		}
103		/* we're done if id overflows back to zero */
104		if (!next_index)
105			break;
106	}
107
108	if (skipped) {
109		delay(1);
110		goto restart;
111	}
112
113	return last_error;
114}
115
116
117/*
118 * Purge a dquot from all tracking data structures and free it.
119 */
120STATIC int
121xfs_qm_dqpurge(
122	struct xfs_dquot	*dqp,
123	void			*data)
124{
125	struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;
126	int			error = -EAGAIN;
127
128	xfs_dqlock(dqp);
129	if ((dqp->q_flags & XFS_DQFLAG_FREEING) || dqp->q_nrefs != 0)
130		goto out_unlock;
131
132	dqp->q_flags |= XFS_DQFLAG_FREEING;
133
134	xfs_dqflock(dqp);
135
136	/*
137	 * If we are turning this type of quotas off, we don't care
138	 * about the dirty metadata sitting in this dquot. OTOH, if
139	 * we're unmounting, we do care, so we flush it and wait.
140	 */
141	if (XFS_DQ_IS_DIRTY(dqp)) {
142		struct xfs_buf	*bp = NULL;
143
144		/*
145		 * We don't care about getting disk errors here. We need
146		 * to purge this dquot anyway, so we go ahead regardless.
147		 */
148		error = xfs_qm_dqflush(dqp, &bp);
149		if (!error) {
150			error = xfs_bwrite(bp);
151			xfs_buf_relse(bp);
152		} else if (error == -EAGAIN) {
153			dqp->q_flags &= ~XFS_DQFLAG_FREEING;
154			goto out_unlock;
155		}
156		xfs_dqflock(dqp);
157	}
158
159	ASSERT(atomic_read(&dqp->q_pincount) == 0);
160	ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) ||
161		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
162
163	xfs_dqfunlock(dqp);
164	xfs_dqunlock(dqp);
165
166	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
167	qi->qi_dquots--;
168
169	/*
170	 * We move dquots to the freelist as soon as their reference count
171	 * hits zero, so it really should be on the freelist here.
172	 */
173	ASSERT(!list_empty(&dqp->q_lru));
174	list_lru_del(&qi->qi_lru, &dqp->q_lru);
175	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
176
177	xfs_qm_dqdestroy(dqp);
178	return 0;
179
180out_unlock:
181	xfs_dqunlock(dqp);
182	return error;
183}
184
185/*
186 * Purge the dquot cache.
187 */
188static void
189xfs_qm_dqpurge_all(
190	struct xfs_mount	*mp)
191{
192	xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
193	xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
194	xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
195}
196
197/*
198 * Just destroy the quotainfo structure.
199 */
200void
201xfs_qm_unmount(
202	struct xfs_mount	*mp)
203{
204	if (mp->m_quotainfo) {
205		xfs_qm_dqpurge_all(mp);
206		xfs_qm_destroy_quotainfo(mp);
207	}
208}
209
210/*
211 * Called from the vfsops layer.
212 */
213void
214xfs_qm_unmount_quotas(
215	xfs_mount_t	*mp)
216{
217	/*
218	 * Release the dquots that root inode, et al might be holding,
219	 * before we flush quotas and blow away the quotainfo structure.
220	 */
221	ASSERT(mp->m_rootip);
222	xfs_qm_dqdetach(mp->m_rootip);
223	if (mp->m_rbmip)
224		xfs_qm_dqdetach(mp->m_rbmip);
225	if (mp->m_rsumip)
226		xfs_qm_dqdetach(mp->m_rsumip);
227
228	/*
229	 * Release the quota inodes.
230	 */
231	if (mp->m_quotainfo) {
232		if (mp->m_quotainfo->qi_uquotaip) {
233			xfs_irele(mp->m_quotainfo->qi_uquotaip);
234			mp->m_quotainfo->qi_uquotaip = NULL;
235		}
236		if (mp->m_quotainfo->qi_gquotaip) {
237			xfs_irele(mp->m_quotainfo->qi_gquotaip);
238			mp->m_quotainfo->qi_gquotaip = NULL;
239		}
240		if (mp->m_quotainfo->qi_pquotaip) {
241			xfs_irele(mp->m_quotainfo->qi_pquotaip);
242			mp->m_quotainfo->qi_pquotaip = NULL;
243		}
244	}
245}
246
247STATIC int
248xfs_qm_dqattach_one(
249	struct xfs_inode	*ip,
250	xfs_dqtype_t		type,
251	bool			doalloc,
252	struct xfs_dquot	**IO_idqpp)
253{
254	struct xfs_dquot	*dqp;
255	int			error;
256
257	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
258	error = 0;
259
260	/*
261	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
262	 * or &i_gdquot. This made the code look weird, but made the logic a lot
263	 * simpler.
264	 */
265	dqp = *IO_idqpp;
266	if (dqp) {
267		trace_xfs_dqattach_found(dqp);
268		return 0;
269	}
270
271	/*
272	 * Find the dquot from somewhere. This bumps the reference count of
273	 * dquot and returns it locked.  This can return ENOENT if dquot didn't
274	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
275	 * turned off suddenly.
276	 */
277	error = xfs_qm_dqget_inode(ip, type, doalloc, &dqp);
278	if (error)
279		return error;
280
281	trace_xfs_dqattach_get(dqp);
282
283	/*
284	 * dqget may have dropped and re-acquired the ilock, but it guarantees
285	 * that the dquot returned is the one that should go in the inode.
286	 */
287	*IO_idqpp = dqp;
288	xfs_dqunlock(dqp);
289	return 0;
290}
291
292static bool
293xfs_qm_need_dqattach(
294	struct xfs_inode	*ip)
295{
296	struct xfs_mount	*mp = ip->i_mount;
297
298	if (!XFS_IS_QUOTA_ON(mp))
299		return false;
300	if (!XFS_NOT_DQATTACHED(mp, ip))
301		return false;
302	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
303		return false;
304	return true;
305}
306
307/*
308 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
309 * into account.
310 * If @doalloc is true, the dquot(s) will be allocated if needed.
311 * Inode may get unlocked and relocked in here, and the caller must deal with
312 * the consequences.
313 */
314int
315xfs_qm_dqattach_locked(
316	xfs_inode_t	*ip,
317	bool		doalloc)
318{
319	xfs_mount_t	*mp = ip->i_mount;
320	int		error = 0;
321
322	if (!xfs_qm_need_dqattach(ip))
323		return 0;
324
325	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
326
327	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
328		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
329				doalloc, &ip->i_udquot);
330		if (error)
331			goto done;
332		ASSERT(ip->i_udquot);
333	}
334
335	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
336		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
337				doalloc, &ip->i_gdquot);
338		if (error)
339			goto done;
340		ASSERT(ip->i_gdquot);
341	}
342
343	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
344		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
345				doalloc, &ip->i_pdquot);
346		if (error)
347			goto done;
348		ASSERT(ip->i_pdquot);
349	}
350
351done:
352	/*
353	 * Don't worry about the dquots that we may have attached before any
354	 * error - they'll get detached later if it has not already been done.
355	 */
356	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
357	return error;
358}
359
360int
361xfs_qm_dqattach(
362	struct xfs_inode	*ip)
363{
364	int			error;
365
366	if (!xfs_qm_need_dqattach(ip))
367		return 0;
368
369	xfs_ilock(ip, XFS_ILOCK_EXCL);
370	error = xfs_qm_dqattach_locked(ip, false);
371	xfs_iunlock(ip, XFS_ILOCK_EXCL);
372
373	return error;
374}
375
376/*
377 * Release dquots (and their references) if any.
378 * The inode should be locked EXCL except when this's called by
379 * xfs_ireclaim.
380 */
381void
382xfs_qm_dqdetach(
383	xfs_inode_t	*ip)
384{
385	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))
386		return;
387
388	trace_xfs_dquot_dqdetach(ip);
389
390	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));
391	if (ip->i_udquot) {
392		xfs_qm_dqrele(ip->i_udquot);
393		ip->i_udquot = NULL;
394	}
395	if (ip->i_gdquot) {
396		xfs_qm_dqrele(ip->i_gdquot);
397		ip->i_gdquot = NULL;
398	}
399	if (ip->i_pdquot) {
400		xfs_qm_dqrele(ip->i_pdquot);
401		ip->i_pdquot = NULL;
402	}
403}
404
405struct xfs_qm_isolate {
406	struct list_head	buffers;
407	struct list_head	dispose;
408};
409
410static enum lru_status
411xfs_qm_dquot_isolate(
412	struct list_head	*item,
413	struct list_lru_one	*lru,
414	spinlock_t		*lru_lock,
415	void			*arg)
416		__releases(lru_lock) __acquires(lru_lock)
417{
418	struct xfs_dquot	*dqp = container_of(item,
419						struct xfs_dquot, q_lru);
420	struct xfs_qm_isolate	*isol = arg;
421
422	if (!xfs_dqlock_nowait(dqp))
423		goto out_miss_busy;
424
425	/*
426	 * If something else is freeing this dquot and hasn't yet removed it
427	 * from the LRU, leave it for the freeing task to complete the freeing
428	 * process rather than risk it being free from under us here.
429	 */
430	if (dqp->q_flags & XFS_DQFLAG_FREEING)
431		goto out_miss_unlock;
432
433	/*
434	 * This dquot has acquired a reference in the meantime remove it from
435	 * the freelist and try again.
436	 */
437	if (dqp->q_nrefs) {
438		xfs_dqunlock(dqp);
439		XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
440
441		trace_xfs_dqreclaim_want(dqp);
442		list_lru_isolate(lru, &dqp->q_lru);
443		XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
444		return LRU_REMOVED;
445	}
446
447	/*
448	 * If the dquot is dirty, flush it. If it's already being flushed, just
449	 * skip it so there is time for the IO to complete before we try to
450	 * reclaim it again on the next LRU pass.
451	 */
452	if (!xfs_dqflock_nowait(dqp))
453		goto out_miss_unlock;
454
455	if (XFS_DQ_IS_DIRTY(dqp)) {
456		struct xfs_buf	*bp = NULL;
457		int		error;
458
459		trace_xfs_dqreclaim_dirty(dqp);
460
461		/* we have to drop the LRU lock to flush the dquot */
462		spin_unlock(lru_lock);
463
464		error = xfs_qm_dqflush(dqp, &bp);
465		if (error)
466			goto out_unlock_dirty;
467
468		xfs_buf_delwri_queue(bp, &isol->buffers);
469		xfs_buf_relse(bp);
470		goto out_unlock_dirty;
471	}
472	xfs_dqfunlock(dqp);
473
474	/*
475	 * Prevent lookups now that we are past the point of no return.
476	 */
477	dqp->q_flags |= XFS_DQFLAG_FREEING;
478	xfs_dqunlock(dqp);
479
480	ASSERT(dqp->q_nrefs == 0);
481	list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
482	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
483	trace_xfs_dqreclaim_done(dqp);
484	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
485	return LRU_REMOVED;
486
487out_miss_unlock:
488	xfs_dqunlock(dqp);
489out_miss_busy:
490	trace_xfs_dqreclaim_busy(dqp);
491	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
492	return LRU_SKIP;
493
494out_unlock_dirty:
495	trace_xfs_dqreclaim_busy(dqp);
496	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
497	xfs_dqunlock(dqp);
498	spin_lock(lru_lock);
499	return LRU_RETRY;
500}
501
502static unsigned long
503xfs_qm_shrink_scan(
504	struct shrinker		*shrink,
505	struct shrink_control	*sc)
506{
507	struct xfs_quotainfo	*qi = container_of(shrink,
508					struct xfs_quotainfo, qi_shrinker);
509	struct xfs_qm_isolate	isol;
510	unsigned long		freed;
511	int			error;
512
513	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
514		return 0;
515
516	INIT_LIST_HEAD(&isol.buffers);
517	INIT_LIST_HEAD(&isol.dispose);
518
519	freed = list_lru_shrink_walk(&qi->qi_lru, sc,
520				     xfs_qm_dquot_isolate, &isol);
521
522	error = xfs_buf_delwri_submit(&isol.buffers);
523	if (error)
524		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
525
526	while (!list_empty(&isol.dispose)) {
527		struct xfs_dquot	*dqp;
528
529		dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
530		list_del_init(&dqp->q_lru);
531		xfs_qm_dqfree_one(dqp);
532	}
533
534	return freed;
535}
536
537static unsigned long
538xfs_qm_shrink_count(
539	struct shrinker		*shrink,
540	struct shrink_control	*sc)
541{
542	struct xfs_quotainfo	*qi = container_of(shrink,
543					struct xfs_quotainfo, qi_shrinker);
544
545	return list_lru_shrink_count(&qi->qi_lru, sc);
546}
547
548STATIC void
549xfs_qm_set_defquota(
550	struct xfs_mount	*mp,
551	xfs_dqtype_t		type,
552	struct xfs_quotainfo	*qinf)
553{
554	struct xfs_dquot	*dqp;
555	struct xfs_def_quota	*defq;
556	int			error;
557
558	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
559	if (error)
560		return;
561
562	defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp));
563
564	/*
565	 * Timers and warnings have been already set, let's just set the
566	 * default limits for this quota type
567	 */
568	defq->blk.hard = dqp->q_blk.hardlimit;
569	defq->blk.soft = dqp->q_blk.softlimit;
570	defq->ino.hard = dqp->q_ino.hardlimit;
571	defq->ino.soft = dqp->q_ino.softlimit;
572	defq->rtb.hard = dqp->q_rtb.hardlimit;
573	defq->rtb.soft = dqp->q_rtb.softlimit;
574	xfs_qm_dqdestroy(dqp);
575}
576
577/* Initialize quota time limits from the root dquot. */
578static void
579xfs_qm_init_timelimits(
580	struct xfs_mount	*mp,
581	xfs_dqtype_t		type)
582{
583	struct xfs_quotainfo	*qinf = mp->m_quotainfo;
584	struct xfs_def_quota	*defq;
585	struct xfs_dquot	*dqp;
586	int			error;
587
588	defq = xfs_get_defquota(qinf, type);
589
590	defq->blk.time = XFS_QM_BTIMELIMIT;
591	defq->ino.time = XFS_QM_ITIMELIMIT;
592	defq->rtb.time = XFS_QM_RTBTIMELIMIT;
593
594	/*
595	 * We try to get the limits from the superuser's limits fields.
596	 * This is quite hacky, but it is standard quota practice.
597	 *
598	 * Since we may not have done a quotacheck by this point, just read
599	 * the dquot without attaching it to any hashtables or lists.
600	 */
601	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
602	if (error)
603		return;
604
605	/*
606	 * The warnings and timers set the grace period given to
607	 * a user or group before he or she can not perform any
608	 * more writing. If it is zero, a default is used.
609	 */
610	if (dqp->q_blk.timer)
611		defq->blk.time = dqp->q_blk.timer;
612	if (dqp->q_ino.timer)
613		defq->ino.time = dqp->q_ino.timer;
614	if (dqp->q_rtb.timer)
615		defq->rtb.time = dqp->q_rtb.timer;
616
617	xfs_qm_dqdestroy(dqp);
618}
619
620/*
621 * This initializes all the quota information that's kept in the
622 * mount structure
623 */
624STATIC int
625xfs_qm_init_quotainfo(
626	struct xfs_mount	*mp)
627{
628	struct xfs_quotainfo	*qinf;
629	int			error;
630
631	ASSERT(XFS_IS_QUOTA_ON(mp));
632
633	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0);
634
635	error = list_lru_init(&qinf->qi_lru);
636	if (error)
637		goto out_free_qinf;
638
639	/*
640	 * See if quotainodes are setup, and if not, allocate them,
641	 * and change the superblock accordingly.
642	 */
643	error = xfs_qm_init_quotainos(mp);
644	if (error)
645		goto out_free_lru;
646
647	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
648	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
649	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
650	mutex_init(&qinf->qi_tree_lock);
651
652	/* mutex used to serialize quotaoffs */
653	mutex_init(&qinf->qi_quotaofflock);
654
655	/* Precalc some constants */
656	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
657	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
658	if (xfs_has_bigtime(mp)) {
659		qinf->qi_expiry_min =
660			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
661		qinf->qi_expiry_max =
662			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX);
663	} else {
664		qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN;
665		qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX;
666	}
667	trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min,
668			qinf->qi_expiry_max);
669
670	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
671
672	xfs_qm_init_timelimits(mp, XFS_DQTYPE_USER);
673	xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP);
674	xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ);
675
676	if (XFS_IS_UQUOTA_ON(mp))
677		xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf);
678	if (XFS_IS_GQUOTA_ON(mp))
679		xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf);
680	if (XFS_IS_PQUOTA_ON(mp))
681		xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
682
683	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
684	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
685	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
686	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
687
688	error = register_shrinker(&qinf->qi_shrinker, "xfs-qm:%s",
689				  mp->m_super->s_id);
690	if (error)
691		goto out_free_inos;
692
693	return 0;
694
695out_free_inos:
696	mutex_destroy(&qinf->qi_quotaofflock);
697	mutex_destroy(&qinf->qi_tree_lock);
698	xfs_qm_destroy_quotainos(qinf);
699out_free_lru:
700	list_lru_destroy(&qinf->qi_lru);
701out_free_qinf:
702	kmem_free(qinf);
703	mp->m_quotainfo = NULL;
704	return error;
705}
706
707/*
708 * Gets called when unmounting a filesystem or when all quotas get
709 * turned off.
710 * This purges the quota inodes, destroys locks and frees itself.
711 */
712void
713xfs_qm_destroy_quotainfo(
714	struct xfs_mount	*mp)
715{
716	struct xfs_quotainfo	*qi;
717
718	qi = mp->m_quotainfo;
719	ASSERT(qi != NULL);
720
721	unregister_shrinker(&qi->qi_shrinker);
722	list_lru_destroy(&qi->qi_lru);
723	xfs_qm_destroy_quotainos(qi);
724	mutex_destroy(&qi->qi_tree_lock);
725	mutex_destroy(&qi->qi_quotaofflock);
726	kmem_free(qi);
727	mp->m_quotainfo = NULL;
728}
729
730/*
731 * Create an inode and return with a reference already taken, but unlocked
732 * This is how we create quota inodes
733 */
734STATIC int
735xfs_qm_qino_alloc(
736	struct xfs_mount	*mp,
737	struct xfs_inode	**ipp,
738	unsigned int		flags)
739{
740	struct xfs_trans	*tp;
741	int			error;
742	bool			need_alloc = true;
743
744	*ipp = NULL;
745	/*
746	 * With superblock that doesn't have separate pquotino, we
747	 * share an inode between gquota and pquota. If the on-disk
748	 * superblock has GQUOTA and the filesystem is now mounted
749	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
750	 * vice-versa.
751	 */
752	if (!xfs_has_pquotino(mp) &&
753			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
754		xfs_ino_t ino = NULLFSINO;
755
756		if ((flags & XFS_QMOPT_PQUOTA) &&
757			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
758			ino = mp->m_sb.sb_gquotino;
759			if (XFS_IS_CORRUPT(mp,
760					   mp->m_sb.sb_pquotino != NULLFSINO))
761				return -EFSCORRUPTED;
762		} else if ((flags & XFS_QMOPT_GQUOTA) &&
763			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
764			ino = mp->m_sb.sb_pquotino;
765			if (XFS_IS_CORRUPT(mp,
766					   mp->m_sb.sb_gquotino != NULLFSINO))
767				return -EFSCORRUPTED;
768		}
769		if (ino != NULLFSINO) {
770			error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
771			if (error)
772				return error;
773			mp->m_sb.sb_gquotino = NULLFSINO;
774			mp->m_sb.sb_pquotino = NULLFSINO;
775			need_alloc = false;
776		}
777	}
778
779	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
780			need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0,
781			0, 0, &tp);
782	if (error)
783		return error;
784
785	if (need_alloc) {
786		xfs_ino_t	ino;
787
788		error = xfs_dialloc(&tp, 0, S_IFREG, &ino);
789		if (!error)
790			error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino,
791					S_IFREG, 1, 0, 0, false, ipp);
792		if (error) {
793			xfs_trans_cancel(tp);
794			return error;
795		}
796	}
797
798	/*
799	 * Make the changes in the superblock, and log those too.
800	 * sbfields arg may contain fields other than *QUOTINO;
801	 * VERSIONNUM for example.
802	 */
803	spin_lock(&mp->m_sb_lock);
804	if (flags & XFS_QMOPT_SBVERSION) {
805		ASSERT(!xfs_has_quota(mp));
806
807		xfs_add_quota(mp);
808		mp->m_sb.sb_uquotino = NULLFSINO;
809		mp->m_sb.sb_gquotino = NULLFSINO;
810		mp->m_sb.sb_pquotino = NULLFSINO;
811
812		/* qflags will get updated fully _after_ quotacheck */
813		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
814	}
815	if (flags & XFS_QMOPT_UQUOTA)
816		mp->m_sb.sb_uquotino = (*ipp)->i_ino;
817	else if (flags & XFS_QMOPT_GQUOTA)
818		mp->m_sb.sb_gquotino = (*ipp)->i_ino;
819	else
820		mp->m_sb.sb_pquotino = (*ipp)->i_ino;
821	spin_unlock(&mp->m_sb_lock);
822	xfs_log_sb(tp);
823
824	error = xfs_trans_commit(tp);
825	if (error) {
826		ASSERT(xfs_is_shutdown(mp));
827		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
828	}
829	if (need_alloc)
830		xfs_finish_inode_setup(*ipp);
831	return error;
832}
833
834
835STATIC void
836xfs_qm_reset_dqcounts(
837	struct xfs_mount	*mp,
838	struct xfs_buf		*bp,
839	xfs_dqid_t		id,
840	xfs_dqtype_t		type)
841{
842	struct xfs_dqblk	*dqb;
843	int			j;
844
845	trace_xfs_reset_dqcounts(bp, _RET_IP_);
846
847	/*
848	 * Reset all counters and timers. They'll be
849	 * started afresh by xfs_qm_quotacheck.
850	 */
851#ifdef DEBUG
852	j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
853		sizeof(struct xfs_dqblk);
854	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
855#endif
856	dqb = bp->b_addr;
857	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
858		struct xfs_disk_dquot	*ddq;
859
860		ddq = (struct xfs_disk_dquot *)&dqb[j];
861
862		/*
863		 * Do a sanity check, and if needed, repair the dqblk. Don't
864		 * output any warnings because it's perfectly possible to
865		 * find uninitialised dquot blks. See comment in
866		 * xfs_dquot_verify.
867		 */
868		if (xfs_dqblk_verify(mp, &dqb[j], id + j) ||
869		    (dqb[j].dd_diskdq.d_type & XFS_DQTYPE_REC_MASK) != type)
870			xfs_dqblk_repair(mp, &dqb[j], id + j, type);
871
872		/*
873		 * Reset type in case we are reusing group quota file for
874		 * project quotas or vice versa
875		 */
876		ddq->d_type = type;
877		ddq->d_bcount = 0;
878		ddq->d_icount = 0;
879		ddq->d_rtbcount = 0;
880
881		/*
882		 * dquot id 0 stores the default grace period and the maximum
883		 * warning limit that were set by the administrator, so we
884		 * should not reset them.
885		 */
886		if (ddq->d_id != 0) {
887			ddq->d_btimer = 0;
888			ddq->d_itimer = 0;
889			ddq->d_rtbtimer = 0;
890			ddq->d_bwarns = 0;
891			ddq->d_iwarns = 0;
892			ddq->d_rtbwarns = 0;
893			if (xfs_has_bigtime(mp))
894				ddq->d_type |= XFS_DQTYPE_BIGTIME;
895		}
896
897		if (xfs_has_crc(mp)) {
898			xfs_update_cksum((char *)&dqb[j],
899					 sizeof(struct xfs_dqblk),
900					 XFS_DQUOT_CRC_OFF);
901		}
902	}
903}
904
905STATIC int
906xfs_qm_reset_dqcounts_all(
907	struct xfs_mount	*mp,
908	xfs_dqid_t		firstid,
909	xfs_fsblock_t		bno,
910	xfs_filblks_t		blkcnt,
911	xfs_dqtype_t		type,
912	struct list_head	*buffer_list)
913{
914	struct xfs_buf		*bp;
915	int			error = 0;
916
917	ASSERT(blkcnt > 0);
918
919	/*
920	 * Blkcnt arg can be a very big number, and might even be
921	 * larger than the log itself. So, we have to break it up into
922	 * manageable-sized transactions.
923	 * Note that we don't start a permanent transaction here; we might
924	 * not be able to get a log reservation for the whole thing up front,
925	 * and we don't really care to either, because we just discard
926	 * everything if we were to crash in the middle of this loop.
927	 */
928	while (blkcnt--) {
929		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
930			      XFS_FSB_TO_DADDR(mp, bno),
931			      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
932			      &xfs_dquot_buf_ops);
933
934		/*
935		 * CRC and validation errors will return a EFSCORRUPTED here. If
936		 * this occurs, re-read without CRC validation so that we can
937		 * repair the damage via xfs_qm_reset_dqcounts(). This process
938		 * will leave a trace in the log indicating corruption has
939		 * been detected.
940		 */
941		if (error == -EFSCORRUPTED) {
942			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
943				      XFS_FSB_TO_DADDR(mp, bno),
944				      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
945				      NULL);
946		}
947
948		if (error)
949			break;
950
951		/*
952		 * A corrupt buffer might not have a verifier attached, so
953		 * make sure we have the correct one attached before writeback
954		 * occurs.
955		 */
956		bp->b_ops = &xfs_dquot_buf_ops;
957		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
958		xfs_buf_delwri_queue(bp, buffer_list);
959		xfs_buf_relse(bp);
960
961		/* goto the next block. */
962		bno++;
963		firstid += mp->m_quotainfo->qi_dqperchunk;
964	}
965
966	return error;
967}
968
969/*
970 * Iterate over all allocated dquot blocks in this quota inode, zeroing all
971 * counters for every chunk of dquots that we find.
972 */
973STATIC int
974xfs_qm_reset_dqcounts_buf(
975	struct xfs_mount	*mp,
976	struct xfs_inode	*qip,
977	xfs_dqtype_t		type,
978	struct list_head	*buffer_list)
979{
980	struct xfs_bmbt_irec	*map;
981	int			i, nmaps;	/* number of map entries */
982	int			error;		/* return value */
983	xfs_fileoff_t		lblkno;
984	xfs_filblks_t		maxlblkcnt;
985	xfs_dqid_t		firstid;
986	xfs_fsblock_t		rablkno;
987	xfs_filblks_t		rablkcnt;
988
989	error = 0;
990	/*
991	 * This looks racy, but we can't keep an inode lock across a
992	 * trans_reserve. But, this gets called during quotacheck, and that
993	 * happens only at mount time which is single threaded.
994	 */
995	if (qip->i_nblocks == 0)
996		return 0;
997
998	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0);
999
1000	lblkno = 0;
1001	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
1002	do {
1003		uint		lock_mode;
1004
1005		nmaps = XFS_DQITER_MAP_SIZE;
1006		/*
1007		 * We aren't changing the inode itself. Just changing
1008		 * some of its data. No new blocks are added here, and
1009		 * the inode is never added to the transaction.
1010		 */
1011		lock_mode = xfs_ilock_data_map_shared(qip);
1012		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1013				       map, &nmaps, 0);
1014		xfs_iunlock(qip, lock_mode);
1015		if (error)
1016			break;
1017
1018		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1019		for (i = 0; i < nmaps; i++) {
1020			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1021			ASSERT(map[i].br_blockcount);
1022
1023
1024			lblkno += map[i].br_blockcount;
1025
1026			if (map[i].br_startblock == HOLESTARTBLOCK)
1027				continue;
1028
1029			firstid = (xfs_dqid_t) map[i].br_startoff *
1030				mp->m_quotainfo->qi_dqperchunk;
1031			/*
1032			 * Do a read-ahead on the next extent.
1033			 */
1034			if ((i+1 < nmaps) &&
1035			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1036				rablkcnt =  map[i+1].br_blockcount;
1037				rablkno = map[i+1].br_startblock;
1038				while (rablkcnt--) {
1039					xfs_buf_readahead(mp->m_ddev_targp,
1040					       XFS_FSB_TO_DADDR(mp, rablkno),
1041					       mp->m_quotainfo->qi_dqchunklen,
1042					       &xfs_dquot_buf_ops);
1043					rablkno++;
1044				}
1045			}
1046			/*
1047			 * Iterate thru all the blks in the extent and
1048			 * reset the counters of all the dquots inside them.
1049			 */
1050			error = xfs_qm_reset_dqcounts_all(mp, firstid,
1051						   map[i].br_startblock,
1052						   map[i].br_blockcount,
1053						   type, buffer_list);
1054			if (error)
1055				goto out;
1056		}
1057	} while (nmaps > 0);
1058
1059out:
1060	kmem_free(map);
1061	return error;
1062}
1063
1064/*
1065 * Called by dqusage_adjust in doing a quotacheck.
1066 *
1067 * Given the inode, and a dquot id this updates both the incore dqout as well
1068 * as the buffer copy. This is so that once the quotacheck is done, we can
1069 * just log all the buffers, as opposed to logging numerous updates to
1070 * individual dquots.
1071 */
1072STATIC int
1073xfs_qm_quotacheck_dqadjust(
1074	struct xfs_inode	*ip,
1075	xfs_dqtype_t		type,
1076	xfs_qcnt_t		nblks,
1077	xfs_qcnt_t		rtblks)
1078{
1079	struct xfs_mount	*mp = ip->i_mount;
1080	struct xfs_dquot	*dqp;
1081	xfs_dqid_t		id;
1082	int			error;
1083
1084	id = xfs_qm_id_for_quotatype(ip, type);
1085	error = xfs_qm_dqget(mp, id, type, true, &dqp);
1086	if (error) {
1087		/*
1088		 * Shouldn't be able to turn off quotas here.
1089		 */
1090		ASSERT(error != -ESRCH);
1091		ASSERT(error != -ENOENT);
1092		return error;
1093	}
1094
1095	trace_xfs_dqadjust(dqp);
1096
1097	/*
1098	 * Adjust the inode count and the block count to reflect this inode's
1099	 * resource usage.
1100	 */
1101	dqp->q_ino.count++;
1102	dqp->q_ino.reserved++;
1103	if (nblks) {
1104		dqp->q_blk.count += nblks;
1105		dqp->q_blk.reserved += nblks;
1106	}
1107	if (rtblks) {
1108		dqp->q_rtb.count += rtblks;
1109		dqp->q_rtb.reserved += rtblks;
1110	}
1111
1112	/*
1113	 * Set default limits, adjust timers (since we changed usages)
1114	 *
1115	 * There are no timers for the default values set in the root dquot.
1116	 */
1117	if (dqp->q_id) {
1118		xfs_qm_adjust_dqlimits(dqp);
1119		xfs_qm_adjust_dqtimers(dqp);
1120	}
1121
1122	dqp->q_flags |= XFS_DQFLAG_DIRTY;
1123	xfs_qm_dqput(dqp);
1124	return 0;
1125}
1126
1127/*
1128 * callback routine supplied to bulkstat(). Given an inumber, find its
1129 * dquots and update them to account for resources taken by that inode.
1130 */
1131/* ARGSUSED */
1132STATIC int
1133xfs_qm_dqusage_adjust(
1134	struct xfs_mount	*mp,
1135	struct xfs_trans	*tp,
1136	xfs_ino_t		ino,
1137	void			*data)
1138{
1139	struct xfs_inode	*ip;
1140	xfs_qcnt_t		nblks;
1141	xfs_filblks_t		rtblks = 0;	/* total rt blks */
1142	int			error;
1143
1144	ASSERT(XFS_IS_QUOTA_ON(mp));
1145
1146	/*
1147	 * rootino must have its resources accounted for, not so with the quota
1148	 * inodes.
1149	 */
1150	if (xfs_is_quota_inode(&mp->m_sb, ino))
1151		return 0;
1152
1153	/*
1154	 * We don't _need_ to take the ilock EXCL here because quotacheck runs
1155	 * at mount time and therefore nobody will be racing chown/chproj.
1156	 */
1157	error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
1158	if (error == -EINVAL || error == -ENOENT)
1159		return 0;
1160	if (error)
1161		return error;
1162
1163	/*
1164	 * Reload the incore unlinked list to avoid failure in inodegc.
1165	 * Use an unlocked check here because unrecovered unlinked inodes
1166	 * should be somewhat rare.
1167	 */
1168	if (xfs_inode_unlinked_incomplete(ip)) {
1169		error = xfs_inode_reload_unlinked(ip);
1170		if (error) {
1171			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1172			goto error0;
1173		}
1174	}
1175
1176	ASSERT(ip->i_delayed_blks == 0);
1177
1178	if (XFS_IS_REALTIME_INODE(ip)) {
1179		struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
1180
1181		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1182		if (error)
1183			goto error0;
1184
1185		xfs_bmap_count_leaves(ifp, &rtblks);
1186	}
1187
1188	nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
1189	xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
1190
1191	/*
1192	 * Add the (disk blocks and inode) resources occupied by this
1193	 * inode to its dquots. We do this adjustment in the incore dquot,
1194	 * and also copy the changes to its buffer.
1195	 * We don't care about putting these changes in a transaction
1196	 * envelope because if we crash in the middle of a 'quotacheck'
1197	 * we have to start from the beginning anyway.
1198	 * Once we're done, we'll log all the dquot bufs.
1199	 *
1200	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1201	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1202	 */
1203	if (XFS_IS_UQUOTA_ON(mp)) {
1204		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_USER, nblks,
1205				rtblks);
1206		if (error)
1207			goto error0;
1208	}
1209
1210	if (XFS_IS_GQUOTA_ON(mp)) {
1211		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_GROUP, nblks,
1212				rtblks);
1213		if (error)
1214			goto error0;
1215	}
1216
1217	if (XFS_IS_PQUOTA_ON(mp)) {
1218		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_PROJ, nblks,
1219				rtblks);
1220		if (error)
1221			goto error0;
1222	}
1223
1224error0:
1225	xfs_irele(ip);
1226	return error;
1227}
1228
1229STATIC int
1230xfs_qm_flush_one(
1231	struct xfs_dquot	*dqp,
1232	void			*data)
1233{
1234	struct xfs_mount	*mp = dqp->q_mount;
1235	struct list_head	*buffer_list = data;
1236	struct xfs_buf		*bp = NULL;
1237	int			error = 0;
1238
1239	xfs_dqlock(dqp);
1240	if (dqp->q_flags & XFS_DQFLAG_FREEING)
1241		goto out_unlock;
1242	if (!XFS_DQ_IS_DIRTY(dqp))
1243		goto out_unlock;
1244
1245	/*
1246	 * The only way the dquot is already flush locked by the time quotacheck
1247	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
1248	 * it for the final time. Quotacheck collects all dquot bufs in the
1249	 * local delwri queue before dquots are dirtied, so reclaim can't have
1250	 * possibly queued it for I/O. The only way out is to push the buffer to
1251	 * cycle the flush lock.
1252	 */
1253	if (!xfs_dqflock_nowait(dqp)) {
1254		/* buf is pinned in-core by delwri list */
1255		error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
1256				mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1257		if (error)
1258			goto out_unlock;
1259
1260		if (!(bp->b_flags & _XBF_DELWRI_Q)) {
1261			error = -EAGAIN;
1262			xfs_buf_relse(bp);
1263			goto out_unlock;
1264		}
1265		xfs_buf_unlock(bp);
1266
1267		xfs_buf_delwri_pushbuf(bp, buffer_list);
1268		xfs_buf_rele(bp);
1269
1270		error = -EAGAIN;
1271		goto out_unlock;
1272	}
1273
1274	error = xfs_qm_dqflush(dqp, &bp);
1275	if (error)
1276		goto out_unlock;
1277
1278	xfs_buf_delwri_queue(bp, buffer_list);
1279	xfs_buf_relse(bp);
1280out_unlock:
1281	xfs_dqunlock(dqp);
1282	return error;
1283}
1284
1285/*
1286 * Walk thru all the filesystem inodes and construct a consistent view
1287 * of the disk quota world. If the quotacheck fails, disable quotas.
1288 */
1289STATIC int
1290xfs_qm_quotacheck(
1291	xfs_mount_t	*mp)
1292{
1293	int			error, error2;
1294	uint			flags;
1295	LIST_HEAD		(buffer_list);
1296	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip;
1297	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip;
1298	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;
1299
1300	flags = 0;
1301
1302	ASSERT(uip || gip || pip);
1303	ASSERT(XFS_IS_QUOTA_ON(mp));
1304
1305	xfs_notice(mp, "Quotacheck needed: Please wait.");
1306
1307	/*
1308	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1309	 * their counters to zero. We need a clean slate.
1310	 * We don't log our changes till later.
1311	 */
1312	if (uip) {
1313		error = xfs_qm_reset_dqcounts_buf(mp, uip, XFS_DQTYPE_USER,
1314					 &buffer_list);
1315		if (error)
1316			goto error_return;
1317		flags |= XFS_UQUOTA_CHKD;
1318	}
1319
1320	if (gip) {
1321		error = xfs_qm_reset_dqcounts_buf(mp, gip, XFS_DQTYPE_GROUP,
1322					 &buffer_list);
1323		if (error)
1324			goto error_return;
1325		flags |= XFS_GQUOTA_CHKD;
1326	}
1327
1328	if (pip) {
1329		error = xfs_qm_reset_dqcounts_buf(mp, pip, XFS_DQTYPE_PROJ,
1330					 &buffer_list);
1331		if (error)
1332			goto error_return;
1333		flags |= XFS_PQUOTA_CHKD;
1334	}
1335
1336	xfs_set_quotacheck_running(mp);
1337	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
1338			NULL);
1339	xfs_clear_quotacheck_running(mp);
1340
1341	/*
1342	 * On error, the inode walk may have partially populated the dquot
1343	 * caches.  We must purge them before disabling quota and tearing down
1344	 * the quotainfo, or else the dquots will leak.
1345	 */
1346	if (error)
1347		goto error_purge;
1348
1349	/*
1350	 * We've made all the changes that we need to make incore.  Flush them
1351	 * down to disk buffers if everything was updated successfully.
1352	 */
1353	if (XFS_IS_UQUOTA_ON(mp)) {
1354		error = xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_flush_one,
1355					  &buffer_list);
1356	}
1357	if (XFS_IS_GQUOTA_ON(mp)) {
1358		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_flush_one,
1359					   &buffer_list);
1360		if (!error)
1361			error = error2;
1362	}
1363	if (XFS_IS_PQUOTA_ON(mp)) {
1364		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_flush_one,
1365					   &buffer_list);
1366		if (!error)
1367			error = error2;
1368	}
1369
1370	error2 = xfs_buf_delwri_submit(&buffer_list);
1371	if (!error)
1372		error = error2;
1373
1374	/*
1375	 * We can get this error if we couldn't do a dquot allocation inside
1376	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1377	 * dirty dquots that might be cached, we just want to get rid of them
1378	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1379	 * at this point (because we intentionally didn't in dqget_noattach).
1380	 */
1381	if (error)
1382		goto error_purge;
1383
1384	/*
1385	 * If one type of quotas is off, then it will lose its
1386	 * quotachecked status, since we won't be doing accounting for
1387	 * that type anymore.
1388	 */
1389	mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1390	mp->m_qflags |= flags;
1391
1392error_return:
1393	xfs_buf_delwri_cancel(&buffer_list);
1394
1395	if (error) {
1396		xfs_warn(mp,
1397	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1398			error);
1399		/*
1400		 * We must turn off quotas.
1401		 */
1402		ASSERT(mp->m_quotainfo != NULL);
1403		xfs_qm_destroy_quotainfo(mp);
1404		if (xfs_mount_reset_sbqflags(mp)) {
1405			xfs_warn(mp,
1406				"Quotacheck: Failed to reset quota flags.");
1407		}
1408	} else
1409		xfs_notice(mp, "Quotacheck: Done.");
1410	return error;
1411
1412error_purge:
1413	/*
1414	 * On error, we may have inodes queued for inactivation. This may try
1415	 * to attach dquots to the inode before running cleanup operations on
1416	 * the inode and this can race with the xfs_qm_destroy_quotainfo() call
1417	 * below that frees mp->m_quotainfo. To avoid this race, flush all the
1418	 * pending inodegc operations before we purge the dquots from memory,
1419	 * ensuring that background inactivation is idle whilst we turn off
1420	 * quotas.
1421	 */
1422	xfs_inodegc_flush(mp);
1423	xfs_qm_dqpurge_all(mp);
1424	goto error_return;
1425
1426}
1427
1428/*
1429 * This is called from xfs_mountfs to start quotas and initialize all
1430 * necessary data structures like quotainfo.  This is also responsible for
1431 * running a quotacheck as necessary.  We are guaranteed that the superblock
1432 * is consistently read in at this point.
1433 *
1434 * If we fail here, the mount will continue with quota turned off. We don't
1435 * need to inidicate success or failure at all.
1436 */
1437void
1438xfs_qm_mount_quotas(
1439	struct xfs_mount	*mp)
1440{
1441	int			error = 0;
1442	uint			sbf;
1443
1444	/*
1445	 * If quotas on realtime volumes is not supported, we disable
1446	 * quotas immediately.
1447	 */
1448	if (mp->m_sb.sb_rextents) {
1449		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1450		mp->m_qflags = 0;
1451		goto write_changes;
1452	}
1453
1454	ASSERT(XFS_IS_QUOTA_ON(mp));
1455
1456	/*
1457	 * Allocate the quotainfo structure inside the mount struct, and
1458	 * create quotainode(s), and change/rev superblock if necessary.
1459	 */
1460	error = xfs_qm_init_quotainfo(mp);
1461	if (error) {
1462		/*
1463		 * We must turn off quotas.
1464		 */
1465		ASSERT(mp->m_quotainfo == NULL);
1466		mp->m_qflags = 0;
1467		goto write_changes;
1468	}
1469	/*
1470	 * If any of the quotas are not consistent, do a quotacheck.
1471	 */
1472	if (XFS_QM_NEED_QUOTACHECK(mp)) {
1473		error = xfs_qm_quotacheck(mp);
1474		if (error) {
1475			/* Quotacheck failed and disabled quotas. */
1476			return;
1477		}
1478	}
1479	/*
1480	 * If one type of quotas is off, then it will lose its
1481	 * quotachecked status, since we won't be doing accounting for
1482	 * that type anymore.
1483	 */
1484	if (!XFS_IS_UQUOTA_ON(mp))
1485		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
1486	if (!XFS_IS_GQUOTA_ON(mp))
1487		mp->m_qflags &= ~XFS_GQUOTA_CHKD;
1488	if (!XFS_IS_PQUOTA_ON(mp))
1489		mp->m_qflags &= ~XFS_PQUOTA_CHKD;
1490
1491 write_changes:
1492	/*
1493	 * We actually don't have to acquire the m_sb_lock at all.
1494	 * This can only be called from mount, and that's single threaded. XXX
1495	 */
1496	spin_lock(&mp->m_sb_lock);
1497	sbf = mp->m_sb.sb_qflags;
1498	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
1499	spin_unlock(&mp->m_sb_lock);
1500
1501	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1502		if (xfs_sync_sb(mp, false)) {
1503			/*
1504			 * We could only have been turning quotas off.
1505			 * We aren't in very good shape actually because
1506			 * the incore structures are convinced that quotas are
1507			 * off, but the on disk superblock doesn't know that !
1508			 */
1509			ASSERT(!(XFS_IS_QUOTA_ON(mp)));
1510			xfs_alert(mp, "%s: Superblock update failed!",
1511				__func__);
1512		}
1513	}
1514
1515	if (error) {
1516		xfs_warn(mp, "Failed to initialize disk quotas.");
1517		return;
1518	}
1519}
1520
1521/*
1522 * This is called after the superblock has been read in and we're ready to
1523 * iget the quota inodes.
1524 */
1525STATIC int
1526xfs_qm_init_quotainos(
1527	xfs_mount_t	*mp)
1528{
1529	struct xfs_inode	*uip = NULL;
1530	struct xfs_inode	*gip = NULL;
1531	struct xfs_inode	*pip = NULL;
1532	int			error;
1533	uint			flags = 0;
1534
1535	ASSERT(mp->m_quotainfo);
1536
1537	/*
1538	 * Get the uquota and gquota inodes
1539	 */
1540	if (xfs_has_quota(mp)) {
1541		if (XFS_IS_UQUOTA_ON(mp) &&
1542		    mp->m_sb.sb_uquotino != NULLFSINO) {
1543			ASSERT(mp->m_sb.sb_uquotino > 0);
1544			error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1545					     0, 0, &uip);
1546			if (error)
1547				return error;
1548		}
1549		if (XFS_IS_GQUOTA_ON(mp) &&
1550		    mp->m_sb.sb_gquotino != NULLFSINO) {
1551			ASSERT(mp->m_sb.sb_gquotino > 0);
1552			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1553					     0, 0, &gip);
1554			if (error)
1555				goto error_rele;
1556		}
1557		if (XFS_IS_PQUOTA_ON(mp) &&
1558		    mp->m_sb.sb_pquotino != NULLFSINO) {
1559			ASSERT(mp->m_sb.sb_pquotino > 0);
1560			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
1561					     0, 0, &pip);
1562			if (error)
1563				goto error_rele;
1564		}
1565	} else {
1566		flags |= XFS_QMOPT_SBVERSION;
1567	}
1568
1569	/*
1570	 * Create the three inodes, if they don't exist already. The changes
1571	 * made above will get added to a transaction and logged in one of
1572	 * the qino_alloc calls below.  If the device is readonly,
1573	 * temporarily switch to read-write to do this.
1574	 */
1575	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1576		error = xfs_qm_qino_alloc(mp, &uip,
1577					      flags | XFS_QMOPT_UQUOTA);
1578		if (error)
1579			goto error_rele;
1580
1581		flags &= ~XFS_QMOPT_SBVERSION;
1582	}
1583	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1584		error = xfs_qm_qino_alloc(mp, &gip,
1585					  flags | XFS_QMOPT_GQUOTA);
1586		if (error)
1587			goto error_rele;
1588
1589		flags &= ~XFS_QMOPT_SBVERSION;
1590	}
1591	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1592		error = xfs_qm_qino_alloc(mp, &pip,
1593					  flags | XFS_QMOPT_PQUOTA);
1594		if (error)
1595			goto error_rele;
1596	}
1597
1598	mp->m_quotainfo->qi_uquotaip = uip;
1599	mp->m_quotainfo->qi_gquotaip = gip;
1600	mp->m_quotainfo->qi_pquotaip = pip;
1601
1602	return 0;
1603
1604error_rele:
1605	if (uip)
1606		xfs_irele(uip);
1607	if (gip)
1608		xfs_irele(gip);
1609	if (pip)
1610		xfs_irele(pip);
1611	return error;
1612}
1613
1614STATIC void
1615xfs_qm_destroy_quotainos(
1616	struct xfs_quotainfo	*qi)
1617{
1618	if (qi->qi_uquotaip) {
1619		xfs_irele(qi->qi_uquotaip);
1620		qi->qi_uquotaip = NULL; /* paranoia */
1621	}
1622	if (qi->qi_gquotaip) {
1623		xfs_irele(qi->qi_gquotaip);
1624		qi->qi_gquotaip = NULL;
1625	}
1626	if (qi->qi_pquotaip) {
1627		xfs_irele(qi->qi_pquotaip);
1628		qi->qi_pquotaip = NULL;
1629	}
1630}
1631
1632STATIC void
1633xfs_qm_dqfree_one(
1634	struct xfs_dquot	*dqp)
1635{
1636	struct xfs_mount	*mp = dqp->q_mount;
1637	struct xfs_quotainfo	*qi = mp->m_quotainfo;
1638
1639	mutex_lock(&qi->qi_tree_lock);
1640	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
1641
1642	qi->qi_dquots--;
1643	mutex_unlock(&qi->qi_tree_lock);
1644
1645	xfs_qm_dqdestroy(dqp);
1646}
1647
1648/* --------------- utility functions for vnodeops ---------------- */
1649
1650
1651/*
1652 * Given an inode, a uid, gid and prid make sure that we have
1653 * allocated relevant dquot(s) on disk, and that we won't exceed inode
1654 * quotas by creating this file.
1655 * This also attaches dquot(s) to the given inode after locking it,
1656 * and returns the dquots corresponding to the uid and/or gid.
1657 *
1658 * in	: inode (unlocked)
1659 * out	: udquot, gdquot with references taken and unlocked
1660 */
1661int
1662xfs_qm_vop_dqalloc(
1663	struct xfs_inode	*ip,
1664	kuid_t			uid,
1665	kgid_t			gid,
1666	prid_t			prid,
1667	uint			flags,
1668	struct xfs_dquot	**O_udqpp,
1669	struct xfs_dquot	**O_gdqpp,
1670	struct xfs_dquot	**O_pdqpp)
1671{
1672	struct xfs_mount	*mp = ip->i_mount;
1673	struct inode		*inode = VFS_I(ip);
1674	struct user_namespace	*user_ns = inode->i_sb->s_user_ns;
1675	struct xfs_dquot	*uq = NULL;
1676	struct xfs_dquot	*gq = NULL;
1677	struct xfs_dquot	*pq = NULL;
1678	int			error;
1679	uint			lockflags;
1680
1681	if (!XFS_IS_QUOTA_ON(mp))
1682		return 0;
1683
1684	lockflags = XFS_ILOCK_EXCL;
1685	xfs_ilock(ip, lockflags);
1686
1687	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1688		gid = inode->i_gid;
1689
1690	/*
1691	 * Attach the dquot(s) to this inode, doing a dquot allocation
1692	 * if necessary. The dquot(s) will not be locked.
1693	 */
1694	if (XFS_NOT_DQATTACHED(mp, ip)) {
1695		error = xfs_qm_dqattach_locked(ip, true);
1696		if (error) {
1697			xfs_iunlock(ip, lockflags);
1698			return error;
1699		}
1700	}
1701
1702	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1703		ASSERT(O_udqpp);
1704		if (!uid_eq(inode->i_uid, uid)) {
1705			/*
1706			 * What we need is the dquot that has this uid, and
1707			 * if we send the inode to dqget, the uid of the inode
1708			 * takes priority over what's sent in the uid argument.
1709			 * We must unlock inode here before calling dqget if
1710			 * we're not sending the inode, because otherwise
1711			 * we'll deadlock by doing trans_reserve while
1712			 * holding ilock.
1713			 */
1714			xfs_iunlock(ip, lockflags);
1715			error = xfs_qm_dqget(mp, from_kuid(user_ns, uid),
1716					XFS_DQTYPE_USER, true, &uq);
1717			if (error) {
1718				ASSERT(error != -ENOENT);
1719				return error;
1720			}
1721			/*
1722			 * Get the ilock in the right order.
1723			 */
1724			xfs_dqunlock(uq);
1725			lockflags = XFS_ILOCK_SHARED;
1726			xfs_ilock(ip, lockflags);
1727		} else {
1728			/*
1729			 * Take an extra reference, because we'll return
1730			 * this to caller
1731			 */
1732			ASSERT(ip->i_udquot);
1733			uq = xfs_qm_dqhold(ip->i_udquot);
1734		}
1735	}
1736	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1737		ASSERT(O_gdqpp);
1738		if (!gid_eq(inode->i_gid, gid)) {
1739			xfs_iunlock(ip, lockflags);
1740			error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
1741					XFS_DQTYPE_GROUP, true, &gq);
1742			if (error) {
1743				ASSERT(error != -ENOENT);
1744				goto error_rele;
1745			}
1746			xfs_dqunlock(gq);
1747			lockflags = XFS_ILOCK_SHARED;
1748			xfs_ilock(ip, lockflags);
1749		} else {
1750			ASSERT(ip->i_gdquot);
1751			gq = xfs_qm_dqhold(ip->i_gdquot);
1752		}
1753	}
1754	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1755		ASSERT(O_pdqpp);
1756		if (ip->i_projid != prid) {
1757			xfs_iunlock(ip, lockflags);
1758			error = xfs_qm_dqget(mp, prid,
1759					XFS_DQTYPE_PROJ, true, &pq);
1760			if (error) {
1761				ASSERT(error != -ENOENT);
1762				goto error_rele;
1763			}
1764			xfs_dqunlock(pq);
1765			lockflags = XFS_ILOCK_SHARED;
1766			xfs_ilock(ip, lockflags);
1767		} else {
1768			ASSERT(ip->i_pdquot);
1769			pq = xfs_qm_dqhold(ip->i_pdquot);
1770		}
1771	}
1772	trace_xfs_dquot_dqalloc(ip);
1773
1774	xfs_iunlock(ip, lockflags);
1775	if (O_udqpp)
1776		*O_udqpp = uq;
1777	else
1778		xfs_qm_dqrele(uq);
1779	if (O_gdqpp)
1780		*O_gdqpp = gq;
1781	else
1782		xfs_qm_dqrele(gq);
1783	if (O_pdqpp)
1784		*O_pdqpp = pq;
1785	else
1786		xfs_qm_dqrele(pq);
1787	return 0;
1788
1789error_rele:
1790	xfs_qm_dqrele(gq);
1791	xfs_qm_dqrele(uq);
1792	return error;
1793}
1794
1795/*
1796 * Actually transfer ownership, and do dquot modifications.
1797 * These were already reserved.
1798 */
1799struct xfs_dquot *
1800xfs_qm_vop_chown(
1801	struct xfs_trans	*tp,
1802	struct xfs_inode	*ip,
1803	struct xfs_dquot	**IO_olddq,
1804	struct xfs_dquot	*newdq)
1805{
1806	struct xfs_dquot	*prevdq;
1807	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
1808				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1809
1810
1811	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1812	ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
1813
1814	/* old dquot */
1815	prevdq = *IO_olddq;
1816	ASSERT(prevdq);
1817	ASSERT(prevdq != newdq);
1818
1819	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_nblocks));
1820	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1821
1822	/* the sparkling new dquot */
1823	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_nblocks);
1824	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1825
1826	/*
1827	 * Back when we made quota reservations for the chown, we reserved the
1828	 * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
1829	 * switched the dquots, decrease the new dquot's block reservation
1830	 * (having already bumped up the real counter) so that we don't have
1831	 * any reservation to give back when we commit.
1832	 */
1833	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
1834			-ip->i_delayed_blks);
1835
1836	/*
1837	 * Give the incore reservation for delalloc blocks back to the old
1838	 * dquot.  We don't normally handle delalloc quota reservations
1839	 * transactionally, so just lock the dquot and subtract from the
1840	 * reservation.  Dirty the transaction because it's too late to turn
1841	 * back now.
1842	 */
1843	tp->t_flags |= XFS_TRANS_DIRTY;
1844	xfs_dqlock(prevdq);
1845	ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
1846	prevdq->q_blk.reserved -= ip->i_delayed_blks;
1847	xfs_dqunlock(prevdq);
1848
1849	/*
1850	 * Take an extra reference, because the inode is going to keep
1851	 * this dquot pointer even after the trans_commit.
1852	 */
1853	*IO_olddq = xfs_qm_dqhold(newdq);
1854
1855	return prevdq;
1856}
1857
1858int
1859xfs_qm_vop_rename_dqattach(
1860	struct xfs_inode	**i_tab)
1861{
1862	struct xfs_mount	*mp = i_tab[0]->i_mount;
1863	int			i;
1864
1865	if (!XFS_IS_QUOTA_ON(mp))
1866		return 0;
1867
1868	for (i = 0; (i < 4 && i_tab[i]); i++) {
1869		struct xfs_inode	*ip = i_tab[i];
1870		int			error;
1871
1872		/*
1873		 * Watch out for duplicate entries in the table.
1874		 */
1875		if (i == 0 || ip != i_tab[i-1]) {
1876			if (XFS_NOT_DQATTACHED(mp, ip)) {
1877				error = xfs_qm_dqattach(ip);
1878				if (error)
1879					return error;
1880			}
1881		}
1882	}
1883	return 0;
1884}
1885
1886void
1887xfs_qm_vop_create_dqattach(
1888	struct xfs_trans	*tp,
1889	struct xfs_inode	*ip,
1890	struct xfs_dquot	*udqp,
1891	struct xfs_dquot	*gdqp,
1892	struct xfs_dquot	*pdqp)
1893{
1894	struct xfs_mount	*mp = tp->t_mountp;
1895
1896	if (!XFS_IS_QUOTA_ON(mp))
1897		return;
1898
1899	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1900
1901	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
1902		ASSERT(ip->i_udquot == NULL);
1903		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);
1904
1905		ip->i_udquot = xfs_qm_dqhold(udqp);
1906		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
1907	}
1908	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
1909		ASSERT(ip->i_gdquot == NULL);
1910		ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id);
1911
1912		ip->i_gdquot = xfs_qm_dqhold(gdqp);
1913		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
1914	}
1915	if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
1916		ASSERT(ip->i_pdquot == NULL);
1917		ASSERT(ip->i_projid == pdqp->q_id);
1918
1919		ip->i_pdquot = xfs_qm_dqhold(pdqp);
1920		xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
1921	}
1922}
1923
1924/* Decide if this inode's dquot is near an enforcement boundary. */
1925bool
1926xfs_inode_near_dquot_enforcement(
1927	struct xfs_inode	*ip,
1928	xfs_dqtype_t		type)
1929{
1930	struct xfs_dquot	*dqp;
1931	int64_t			freesp;
1932
1933	/* We only care for quotas that are enabled and enforced. */
1934	dqp = xfs_inode_dquot(ip, type);
1935	if (!dqp || !xfs_dquot_is_enforced(dqp))
1936		return false;
1937
1938	if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
1939	    xfs_dquot_res_over_limits(&dqp->q_rtb))
1940		return true;
1941
1942	/* For space on the data device, check the various thresholds. */
1943	if (!dqp->q_prealloc_hi_wmark)
1944		return false;
1945
1946	if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark)
1947		return false;
1948
1949	if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark)
1950		return true;
1951
1952	freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved;
1953	if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT])
1954		return true;
1955
1956	return false;
1957}
1958