xref: /kernel/linux/linux-5.10/fs/xfs/xfs_qm.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
15#include "xfs_inode.h"
16#include "xfs_iwalk.h"
17#include "xfs_quota.h"
18#include "xfs_bmap.h"
19#include "xfs_bmap_util.h"
20#include "xfs_trans.h"
21#include "xfs_trans_space.h"
22#include "xfs_qm.h"
23#include "xfs_trace.h"
24#include "xfs_icache.h"
25#include "xfs_error.h"
26
27/*
28 * The global quota manager. There is only one of these for the entire
29 * system, _not_ one per file system. XQM keeps track of the overall
30 * quota functionality, including maintaining the freelist and hash
31 * tables of dquots.
32 */
33STATIC int	xfs_qm_init_quotainos(struct xfs_mount *mp);
34STATIC int	xfs_qm_init_quotainfo(struct xfs_mount *mp);
35
36STATIC void	xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi);
37STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
38/*
39 * We use the batch lookup interface to iterate over the dquots as it
40 * currently is the only interface into the radix tree code that allows
41 * fuzzy lookups instead of exact matches.  Holding the lock over multiple
42 * operations is fine as all callers are used either during mount/umount
43 * or quotaoff.
44 */
45#define XFS_DQ_LOOKUP_BATCH	32
46
47STATIC int
48xfs_qm_dquot_walk(
49	struct xfs_mount	*mp,
50	xfs_dqtype_t		type,
51	int			(*execute)(struct xfs_dquot *dqp, void *data),
52	void			*data)
53{
54	struct xfs_quotainfo	*qi = mp->m_quotainfo;
55	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
56	uint32_t		next_index;
57	int			last_error = 0;
58	int			skipped;
59	int			nr_found;
60
61restart:
62	skipped = 0;
63	next_index = 0;
64	nr_found = 0;
65
66	while (1) {
67		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
68		int		error = 0;
69		int		i;
70
71		mutex_lock(&qi->qi_tree_lock);
72		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
73					next_index, XFS_DQ_LOOKUP_BATCH);
74		if (!nr_found) {
75			mutex_unlock(&qi->qi_tree_lock);
76			break;
77		}
78
79		for (i = 0; i < nr_found; i++) {
80			struct xfs_dquot *dqp = batch[i];
81
82			next_index = dqp->q_id + 1;
83
84			error = execute(batch[i], data);
85			if (error == -EAGAIN) {
86				skipped++;
87				continue;
88			}
89			if (error && last_error != -EFSCORRUPTED)
90				last_error = error;
91		}
92
93		mutex_unlock(&qi->qi_tree_lock);
94
95		/* bail out if the filesystem is corrupted.  */
96		if (last_error == -EFSCORRUPTED) {
97			skipped = 0;
98			break;
99		}
100		/* we're done if id overflows back to zero */
101		if (!next_index)
102			break;
103	}
104
105	if (skipped) {
106		delay(1);
107		goto restart;
108	}
109
110	return last_error;
111}
112
113
114/*
115 * Purge a dquot from all tracking data structures and free it.
116 */
117STATIC int
118xfs_qm_dqpurge(
119	struct xfs_dquot	*dqp,
120	void			*data)
121{
122	struct xfs_mount	*mp = dqp->q_mount;
123	struct xfs_quotainfo	*qi = mp->m_quotainfo;
124	int			error = -EAGAIN;
125
126	xfs_dqlock(dqp);
127	if ((dqp->q_flags & XFS_DQFLAG_FREEING) || dqp->q_nrefs != 0)
128		goto out_unlock;
129
130	dqp->q_flags |= XFS_DQFLAG_FREEING;
131
132	xfs_dqflock(dqp);
133
134	/*
135	 * If we are turning this type of quotas off, we don't care
136	 * about the dirty metadata sitting in this dquot. OTOH, if
137	 * we're unmounting, we do care, so we flush it and wait.
138	 */
139	if (XFS_DQ_IS_DIRTY(dqp)) {
140		struct xfs_buf	*bp = NULL;
141
142		/*
143		 * We don't care about getting disk errors here. We need
144		 * to purge this dquot anyway, so we go ahead regardless.
145		 */
146		error = xfs_qm_dqflush(dqp, &bp);
147		if (!error) {
148			error = xfs_bwrite(bp);
149			xfs_buf_relse(bp);
150		} else if (error == -EAGAIN) {
151			dqp->q_flags &= ~XFS_DQFLAG_FREEING;
152			goto out_unlock;
153		}
154		xfs_dqflock(dqp);
155	}
156
157	ASSERT(atomic_read(&dqp->q_pincount) == 0);
158	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
159		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
160
161	xfs_dqfunlock(dqp);
162	xfs_dqunlock(dqp);
163
164	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
165	qi->qi_dquots--;
166
167	/*
168	 * We move dquots to the freelist as soon as their reference count
169	 * hits zero, so it really should be on the freelist here.
170	 */
171	ASSERT(!list_empty(&dqp->q_lru));
172	list_lru_del(&qi->qi_lru, &dqp->q_lru);
173	XFS_STATS_DEC(mp, xs_qm_dquot_unused);
174
175	xfs_qm_dqdestroy(dqp);
176	return 0;
177
178out_unlock:
179	xfs_dqunlock(dqp);
180	return error;
181}
182
183/*
184 * Purge the dquot cache.
185 */
186void
187xfs_qm_dqpurge_all(
188	struct xfs_mount	*mp,
189	uint			flags)
190{
191	if (flags & XFS_QMOPT_UQUOTA)
192		xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_dqpurge, NULL);
193	if (flags & XFS_QMOPT_GQUOTA)
194		xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_dqpurge, NULL);
195	if (flags & XFS_QMOPT_PQUOTA)
196		xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_dqpurge, NULL);
197}
198
199/*
200 * Just destroy the quotainfo structure.
201 */
202void
203xfs_qm_unmount(
204	struct xfs_mount	*mp)
205{
206	if (mp->m_quotainfo) {
207		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
208		xfs_qm_destroy_quotainfo(mp);
209	}
210}
211
212/*
213 * Called from the vfsops layer.
214 */
215void
216xfs_qm_unmount_quotas(
217	xfs_mount_t	*mp)
218{
219	/*
220	 * Release the dquots that root inode, et al might be holding,
221	 * before we flush quotas and blow away the quotainfo structure.
222	 */
223	ASSERT(mp->m_rootip);
224	xfs_qm_dqdetach(mp->m_rootip);
225	if (mp->m_rbmip)
226		xfs_qm_dqdetach(mp->m_rbmip);
227	if (mp->m_rsumip)
228		xfs_qm_dqdetach(mp->m_rsumip);
229
230	/*
231	 * Release the quota inodes.
232	 */
233	if (mp->m_quotainfo) {
234		if (mp->m_quotainfo->qi_uquotaip) {
235			xfs_irele(mp->m_quotainfo->qi_uquotaip);
236			mp->m_quotainfo->qi_uquotaip = NULL;
237		}
238		if (mp->m_quotainfo->qi_gquotaip) {
239			xfs_irele(mp->m_quotainfo->qi_gquotaip);
240			mp->m_quotainfo->qi_gquotaip = NULL;
241		}
242		if (mp->m_quotainfo->qi_pquotaip) {
243			xfs_irele(mp->m_quotainfo->qi_pquotaip);
244			mp->m_quotainfo->qi_pquotaip = NULL;
245		}
246	}
247}
248
249STATIC int
250xfs_qm_dqattach_one(
251	struct xfs_inode	*ip,
252	xfs_dqtype_t		type,
253	bool			doalloc,
254	struct xfs_dquot	**IO_idqpp)
255{
256	struct xfs_dquot	*dqp;
257	int			error;
258
259	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
260	error = 0;
261
262	/*
263	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot
264	 * or &i_gdquot. This made the code look weird, but made the logic a lot
265	 * simpler.
266	 */
267	dqp = *IO_idqpp;
268	if (dqp) {
269		trace_xfs_dqattach_found(dqp);
270		return 0;
271	}
272
273	/*
274	 * Find the dquot from somewhere. This bumps the reference count of
275	 * dquot and returns it locked.  This can return ENOENT if dquot didn't
276	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
277	 * turned off suddenly.
278	 */
279	error = xfs_qm_dqget_inode(ip, type, doalloc, &dqp);
280	if (error)
281		return error;
282
283	trace_xfs_dqattach_get(dqp);
284
285	/*
286	 * dqget may have dropped and re-acquired the ilock, but it guarantees
287	 * that the dquot returned is the one that should go in the inode.
288	 */
289	*IO_idqpp = dqp;
290	xfs_dqunlock(dqp);
291	return 0;
292}
293
294static bool
295xfs_qm_need_dqattach(
296	struct xfs_inode	*ip)
297{
298	struct xfs_mount	*mp = ip->i_mount;
299
300	if (!XFS_IS_QUOTA_RUNNING(mp))
301		return false;
302	if (!XFS_IS_QUOTA_ON(mp))
303		return false;
304	if (!XFS_NOT_DQATTACHED(mp, ip))
305		return false;
306	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
307		return false;
308	return true;
309}
310
311/*
312 * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
313 * into account.
314 * If @doalloc is true, the dquot(s) will be allocated if needed.
315 * Inode may get unlocked and relocked in here, and the caller must deal with
316 * the consequences.
317 */
318int
319xfs_qm_dqattach_locked(
320	xfs_inode_t	*ip,
321	bool		doalloc)
322{
323	xfs_mount_t	*mp = ip->i_mount;
324	int		error = 0;
325
326	if (!xfs_qm_need_dqattach(ip))
327		return 0;
328
329	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
330
331	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
332		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
333				doalloc, &ip->i_udquot);
334		if (error)
335			goto done;
336		ASSERT(ip->i_udquot);
337	}
338
339	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
340		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_GROUP,
341				doalloc, &ip->i_gdquot);
342		if (error)
343			goto done;
344		ASSERT(ip->i_gdquot);
345	}
346
347	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
348		error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_PROJ,
349				doalloc, &ip->i_pdquot);
350		if (error)
351			goto done;
352		ASSERT(ip->i_pdquot);
353	}
354
355done:
356	/*
357	 * Don't worry about the dquots that we may have attached before any
358	 * error - they'll get detached later if it has not already been done.
359	 */
360	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
361	return error;
362}
363
364int
365xfs_qm_dqattach(
366	struct xfs_inode	*ip)
367{
368	int			error;
369
370	if (!xfs_qm_need_dqattach(ip))
371		return 0;
372
373	xfs_ilock(ip, XFS_ILOCK_EXCL);
374	error = xfs_qm_dqattach_locked(ip, false);
375	xfs_iunlock(ip, XFS_ILOCK_EXCL);
376
377	return error;
378}
379
380/*
381 * Release dquots (and their references) if any.
382 * The inode should be locked EXCL except when this's called by
383 * xfs_ireclaim.
384 */
385void
386xfs_qm_dqdetach(
387	xfs_inode_t	*ip)
388{
389	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))
390		return;
391
392	trace_xfs_dquot_dqdetach(ip);
393
394	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));
395	if (ip->i_udquot) {
396		xfs_qm_dqrele(ip->i_udquot);
397		ip->i_udquot = NULL;
398	}
399	if (ip->i_gdquot) {
400		xfs_qm_dqrele(ip->i_gdquot);
401		ip->i_gdquot = NULL;
402	}
403	if (ip->i_pdquot) {
404		xfs_qm_dqrele(ip->i_pdquot);
405		ip->i_pdquot = NULL;
406	}
407}
408
409struct xfs_qm_isolate {
410	struct list_head	buffers;
411	struct list_head	dispose;
412};
413
414static enum lru_status
415xfs_qm_dquot_isolate(
416	struct list_head	*item,
417	struct list_lru_one	*lru,
418	spinlock_t		*lru_lock,
419	void			*arg)
420		__releases(lru_lock) __acquires(lru_lock)
421{
422	struct xfs_dquot	*dqp = container_of(item,
423						struct xfs_dquot, q_lru);
424	struct xfs_qm_isolate	*isol = arg;
425
426	if (!xfs_dqlock_nowait(dqp))
427		goto out_miss_busy;
428
429	/*
430	 * This dquot has acquired a reference in the meantime remove it from
431	 * the freelist and try again.
432	 */
433	if (dqp->q_nrefs) {
434		xfs_dqunlock(dqp);
435		XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
436
437		trace_xfs_dqreclaim_want(dqp);
438		list_lru_isolate(lru, &dqp->q_lru);
439		XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
440		return LRU_REMOVED;
441	}
442
443	/*
444	 * If the dquot is dirty, flush it. If it's already being flushed, just
445	 * skip it so there is time for the IO to complete before we try to
446	 * reclaim it again on the next LRU pass.
447	 */
448	if (!xfs_dqflock_nowait(dqp)) {
449		xfs_dqunlock(dqp);
450		goto out_miss_busy;
451	}
452
453	if (XFS_DQ_IS_DIRTY(dqp)) {
454		struct xfs_buf	*bp = NULL;
455		int		error;
456
457		trace_xfs_dqreclaim_dirty(dqp);
458
459		/* we have to drop the LRU lock to flush the dquot */
460		spin_unlock(lru_lock);
461
462		error = xfs_qm_dqflush(dqp, &bp);
463		if (error)
464			goto out_unlock_dirty;
465
466		xfs_buf_delwri_queue(bp, &isol->buffers);
467		xfs_buf_relse(bp);
468		goto out_unlock_dirty;
469	}
470	xfs_dqfunlock(dqp);
471
472	/*
473	 * Prevent lookups now that we are past the point of no return.
474	 */
475	dqp->q_flags |= XFS_DQFLAG_FREEING;
476	xfs_dqunlock(dqp);
477
478	ASSERT(dqp->q_nrefs == 0);
479	list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
480	XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
481	trace_xfs_dqreclaim_done(dqp);
482	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
483	return LRU_REMOVED;
484
485out_miss_busy:
486	trace_xfs_dqreclaim_busy(dqp);
487	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
488	return LRU_SKIP;
489
490out_unlock_dirty:
491	trace_xfs_dqreclaim_busy(dqp);
492	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
493	xfs_dqunlock(dqp);
494	spin_lock(lru_lock);
495	return LRU_RETRY;
496}
497
498static unsigned long
499xfs_qm_shrink_scan(
500	struct shrinker		*shrink,
501	struct shrink_control	*sc)
502{
503	struct xfs_quotainfo	*qi = container_of(shrink,
504					struct xfs_quotainfo, qi_shrinker);
505	struct xfs_qm_isolate	isol;
506	unsigned long		freed;
507	int			error;
508
509	if ((sc->gfp_mask & (__GFP_FS|__GFP_DIRECT_RECLAIM)) != (__GFP_FS|__GFP_DIRECT_RECLAIM))
510		return 0;
511
512	INIT_LIST_HEAD(&isol.buffers);
513	INIT_LIST_HEAD(&isol.dispose);
514
515	freed = list_lru_shrink_walk(&qi->qi_lru, sc,
516				     xfs_qm_dquot_isolate, &isol);
517
518	error = xfs_buf_delwri_submit(&isol.buffers);
519	if (error)
520		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
521
522	while (!list_empty(&isol.dispose)) {
523		struct xfs_dquot	*dqp;
524
525		dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
526		list_del_init(&dqp->q_lru);
527		xfs_qm_dqfree_one(dqp);
528	}
529
530	return freed;
531}
532
533static unsigned long
534xfs_qm_shrink_count(
535	struct shrinker		*shrink,
536	struct shrink_control	*sc)
537{
538	struct xfs_quotainfo	*qi = container_of(shrink,
539					struct xfs_quotainfo, qi_shrinker);
540
541	return list_lru_shrink_count(&qi->qi_lru, sc);
542}
543
544STATIC void
545xfs_qm_set_defquota(
546	struct xfs_mount	*mp,
547	xfs_dqtype_t		type,
548	struct xfs_quotainfo	*qinf)
549{
550	struct xfs_dquot	*dqp;
551	struct xfs_def_quota	*defq;
552	int			error;
553
554	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
555	if (error)
556		return;
557
558	defq = xfs_get_defquota(qinf, xfs_dquot_type(dqp));
559
560	/*
561	 * Timers and warnings have been already set, let's just set the
562	 * default limits for this quota type
563	 */
564	defq->blk.hard = dqp->q_blk.hardlimit;
565	defq->blk.soft = dqp->q_blk.softlimit;
566	defq->ino.hard = dqp->q_ino.hardlimit;
567	defq->ino.soft = dqp->q_ino.softlimit;
568	defq->rtb.hard = dqp->q_rtb.hardlimit;
569	defq->rtb.soft = dqp->q_rtb.softlimit;
570	xfs_qm_dqdestroy(dqp);
571}
572
573/* Initialize quota time limits from the root dquot. */
574static void
575xfs_qm_init_timelimits(
576	struct xfs_mount	*mp,
577	xfs_dqtype_t		type)
578{
579	struct xfs_quotainfo	*qinf = mp->m_quotainfo;
580	struct xfs_def_quota	*defq;
581	struct xfs_dquot	*dqp;
582	int			error;
583
584	defq = xfs_get_defquota(qinf, type);
585
586	defq->blk.time = XFS_QM_BTIMELIMIT;
587	defq->ino.time = XFS_QM_ITIMELIMIT;
588	defq->rtb.time = XFS_QM_RTBTIMELIMIT;
589	defq->blk.warn = XFS_QM_BWARNLIMIT;
590	defq->ino.warn = XFS_QM_IWARNLIMIT;
591	defq->rtb.warn = XFS_QM_RTBWARNLIMIT;
592
593	/*
594	 * We try to get the limits from the superuser's limits fields.
595	 * This is quite hacky, but it is standard quota practice.
596	 *
597	 * Since we may not have done a quotacheck by this point, just read
598	 * the dquot without attaching it to any hashtables or lists.
599	 */
600	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
601	if (error)
602		return;
603
604	/*
605	 * The warnings and timers set the grace period given to
606	 * a user or group before he or she can not perform any
607	 * more writing. If it is zero, a default is used.
608	 */
609	if (dqp->q_blk.timer)
610		defq->blk.time = dqp->q_blk.timer;
611	if (dqp->q_ino.timer)
612		defq->ino.time = dqp->q_ino.timer;
613	if (dqp->q_rtb.timer)
614		defq->rtb.time = dqp->q_rtb.timer;
615	if (dqp->q_blk.warnings)
616		defq->blk.warn = dqp->q_blk.warnings;
617	if (dqp->q_ino.warnings)
618		defq->ino.warn = dqp->q_ino.warnings;
619	if (dqp->q_rtb.warnings)
620		defq->rtb.warn = dqp->q_rtb.warnings;
621
622	xfs_qm_dqdestroy(dqp);
623}
624
625/*
626 * This initializes all the quota information that's kept in the
627 * mount structure
628 */
629STATIC int
630xfs_qm_init_quotainfo(
631	struct xfs_mount	*mp)
632{
633	struct xfs_quotainfo	*qinf;
634	int			error;
635
636	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
637
638	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0);
639
640	error = list_lru_init(&qinf->qi_lru);
641	if (error)
642		goto out_free_qinf;
643
644	/*
645	 * See if quotainodes are setup, and if not, allocate them,
646	 * and change the superblock accordingly.
647	 */
648	error = xfs_qm_init_quotainos(mp);
649	if (error)
650		goto out_free_lru;
651
652	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
653	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
654	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
655	mutex_init(&qinf->qi_tree_lock);
656
657	/* mutex used to serialize quotaoffs */
658	mutex_init(&qinf->qi_quotaofflock);
659
660	/* Precalc some constants */
661	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
662	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);
663	if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
664		qinf->qi_expiry_min =
665			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MIN);
666		qinf->qi_expiry_max =
667			xfs_dq_bigtime_to_unix(XFS_DQ_BIGTIME_EXPIRY_MAX);
668	} else {
669		qinf->qi_expiry_min = XFS_DQ_LEGACY_EXPIRY_MIN;
670		qinf->qi_expiry_max = XFS_DQ_LEGACY_EXPIRY_MAX;
671	}
672	trace_xfs_quota_expiry_range(mp, qinf->qi_expiry_min,
673			qinf->qi_expiry_max);
674
675	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
676
677	xfs_qm_init_timelimits(mp, XFS_DQTYPE_USER);
678	xfs_qm_init_timelimits(mp, XFS_DQTYPE_GROUP);
679	xfs_qm_init_timelimits(mp, XFS_DQTYPE_PROJ);
680
681	if (XFS_IS_UQUOTA_RUNNING(mp))
682		xfs_qm_set_defquota(mp, XFS_DQTYPE_USER, qinf);
683	if (XFS_IS_GQUOTA_RUNNING(mp))
684		xfs_qm_set_defquota(mp, XFS_DQTYPE_GROUP, qinf);
685	if (XFS_IS_PQUOTA_RUNNING(mp))
686		xfs_qm_set_defquota(mp, XFS_DQTYPE_PROJ, qinf);
687
688	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
689	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
690	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
691	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
692
693	error = register_shrinker(&qinf->qi_shrinker);
694	if (error)
695		goto out_free_inos;
696
697	return 0;
698
699out_free_inos:
700	mutex_destroy(&qinf->qi_quotaofflock);
701	mutex_destroy(&qinf->qi_tree_lock);
702	xfs_qm_destroy_quotainos(qinf);
703out_free_lru:
704	list_lru_destroy(&qinf->qi_lru);
705out_free_qinf:
706	kmem_free(qinf);
707	mp->m_quotainfo = NULL;
708	return error;
709}
710
711/*
712 * Gets called when unmounting a filesystem or when all quotas get
713 * turned off.
714 * This purges the quota inodes, destroys locks and frees itself.
715 */
716void
717xfs_qm_destroy_quotainfo(
718	struct xfs_mount	*mp)
719{
720	struct xfs_quotainfo	*qi;
721
722	qi = mp->m_quotainfo;
723	ASSERT(qi != NULL);
724
725	unregister_shrinker(&qi->qi_shrinker);
726	list_lru_destroy(&qi->qi_lru);
727	xfs_qm_destroy_quotainos(qi);
728	mutex_destroy(&qi->qi_tree_lock);
729	mutex_destroy(&qi->qi_quotaofflock);
730	kmem_free(qi);
731	mp->m_quotainfo = NULL;
732}
733
734/*
735 * Create an inode and return with a reference already taken, but unlocked
736 * This is how we create quota inodes
737 */
738STATIC int
739xfs_qm_qino_alloc(
740	xfs_mount_t	*mp,
741	xfs_inode_t	**ip,
742	uint		flags)
743{
744	xfs_trans_t	*tp;
745	int		error;
746	bool		need_alloc = true;
747
748	*ip = NULL;
749	/*
750	 * With superblock that doesn't have separate pquotino, we
751	 * share an inode between gquota and pquota. If the on-disk
752	 * superblock has GQUOTA and the filesystem is now mounted
753	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
754	 * vice-versa.
755	 */
756	if (!xfs_sb_version_has_pquotino(&mp->m_sb) &&
757			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
758		xfs_ino_t ino = NULLFSINO;
759
760		if ((flags & XFS_QMOPT_PQUOTA) &&
761			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
762			ino = mp->m_sb.sb_gquotino;
763			if (XFS_IS_CORRUPT(mp,
764					   mp->m_sb.sb_pquotino != NULLFSINO))
765				return -EFSCORRUPTED;
766		} else if ((flags & XFS_QMOPT_GQUOTA) &&
767			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
768			ino = mp->m_sb.sb_pquotino;
769			if (XFS_IS_CORRUPT(mp,
770					   mp->m_sb.sb_gquotino != NULLFSINO))
771				return -EFSCORRUPTED;
772		}
773		if (ino != NULLFSINO) {
774			error = xfs_iget(mp, NULL, ino, 0, 0, ip);
775			if (error)
776				return error;
777			mp->m_sb.sb_gquotino = NULLFSINO;
778			mp->m_sb.sb_pquotino = NULLFSINO;
779			need_alloc = false;
780		}
781	}
782
783	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
784			need_alloc ? XFS_QM_QINOCREATE_SPACE_RES(mp) : 0,
785			0, 0, &tp);
786	if (error)
787		return error;
788
789	if (need_alloc) {
790		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
791		if (error) {
792			xfs_trans_cancel(tp);
793			return error;
794		}
795	}
796
797	/*
798	 * Make the changes in the superblock, and log those too.
799	 * sbfields arg may contain fields other than *QUOTINO;
800	 * VERSIONNUM for example.
801	 */
802	spin_lock(&mp->m_sb_lock);
803	if (flags & XFS_QMOPT_SBVERSION) {
804		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
805
806		xfs_sb_version_addquota(&mp->m_sb);
807		mp->m_sb.sb_uquotino = NULLFSINO;
808		mp->m_sb.sb_gquotino = NULLFSINO;
809		mp->m_sb.sb_pquotino = NULLFSINO;
810
811		/* qflags will get updated fully _after_ quotacheck */
812		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
813	}
814	if (flags & XFS_QMOPT_UQUOTA)
815		mp->m_sb.sb_uquotino = (*ip)->i_ino;
816	else if (flags & XFS_QMOPT_GQUOTA)
817		mp->m_sb.sb_gquotino = (*ip)->i_ino;
818	else
819		mp->m_sb.sb_pquotino = (*ip)->i_ino;
820	spin_unlock(&mp->m_sb_lock);
821	xfs_log_sb(tp);
822
823	error = xfs_trans_commit(tp);
824	if (error) {
825		ASSERT(XFS_FORCED_SHUTDOWN(mp));
826		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
827	}
828	if (need_alloc)
829		xfs_finish_inode_setup(*ip);
830	return error;
831}
832
833
834STATIC void
835xfs_qm_reset_dqcounts(
836	struct xfs_mount	*mp,
837	struct xfs_buf		*bp,
838	xfs_dqid_t		id,
839	xfs_dqtype_t		type)
840{
841	struct xfs_dqblk	*dqb;
842	int			j;
843
844	trace_xfs_reset_dqcounts(bp, _RET_IP_);
845
846	/*
847	 * Reset all counters and timers. They'll be
848	 * started afresh by xfs_qm_quotacheck.
849	 */
850#ifdef DEBUG
851	j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
852		sizeof(xfs_dqblk_t);
853	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
854#endif
855	dqb = bp->b_addr;
856	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
857		struct xfs_disk_dquot	*ddq;
858
859		ddq = (struct xfs_disk_dquot *)&dqb[j];
860
861		/*
862		 * Do a sanity check, and if needed, repair the dqblk. Don't
863		 * output any warnings because it's perfectly possible to
864		 * find uninitialised dquot blks. See comment in
865		 * xfs_dquot_verify.
866		 */
867		if (xfs_dqblk_verify(mp, &dqb[j], id + j) ||
868		    (dqb[j].dd_diskdq.d_type & XFS_DQTYPE_REC_MASK) != type)
869			xfs_dqblk_repair(mp, &dqb[j], id + j, type);
870
871		/*
872		 * Reset type in case we are reusing group quota file for
873		 * project quotas or vice versa
874		 */
875		ddq->d_type = type;
876		ddq->d_bcount = 0;
877		ddq->d_icount = 0;
878		ddq->d_rtbcount = 0;
879
880		/*
881		 * dquot id 0 stores the default grace period and the maximum
882		 * warning limit that were set by the administrator, so we
883		 * should not reset them.
884		 */
885		if (ddq->d_id != 0) {
886			ddq->d_btimer = 0;
887			ddq->d_itimer = 0;
888			ddq->d_rtbtimer = 0;
889			ddq->d_bwarns = 0;
890			ddq->d_iwarns = 0;
891			ddq->d_rtbwarns = 0;
892			if (xfs_sb_version_hasbigtime(&mp->m_sb))
893				ddq->d_type |= XFS_DQTYPE_BIGTIME;
894		}
895
896		if (xfs_sb_version_hascrc(&mp->m_sb)) {
897			xfs_update_cksum((char *)&dqb[j],
898					 sizeof(struct xfs_dqblk),
899					 XFS_DQUOT_CRC_OFF);
900		}
901	}
902}
903
904STATIC int
905xfs_qm_reset_dqcounts_all(
906	struct xfs_mount	*mp,
907	xfs_dqid_t		firstid,
908	xfs_fsblock_t		bno,
909	xfs_filblks_t		blkcnt,
910	xfs_dqtype_t		type,
911	struct list_head	*buffer_list)
912{
913	struct xfs_buf		*bp;
914	int			error = 0;
915
916	ASSERT(blkcnt > 0);
917
918	/*
919	 * Blkcnt arg can be a very big number, and might even be
920	 * larger than the log itself. So, we have to break it up into
921	 * manageable-sized transactions.
922	 * Note that we don't start a permanent transaction here; we might
923	 * not be able to get a log reservation for the whole thing up front,
924	 * and we don't really care to either, because we just discard
925	 * everything if we were to crash in the middle of this loop.
926	 */
927	while (blkcnt--) {
928		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
929			      XFS_FSB_TO_DADDR(mp, bno),
930			      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
931			      &xfs_dquot_buf_ops);
932
933		/*
934		 * CRC and validation errors will return a EFSCORRUPTED here. If
935		 * this occurs, re-read without CRC validation so that we can
936		 * repair the damage via xfs_qm_reset_dqcounts(). This process
937		 * will leave a trace in the log indicating corruption has
938		 * been detected.
939		 */
940		if (error == -EFSCORRUPTED) {
941			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
942				      XFS_FSB_TO_DADDR(mp, bno),
943				      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
944				      NULL);
945		}
946
947		if (error)
948			break;
949
950		/*
951		 * A corrupt buffer might not have a verifier attached, so
952		 * make sure we have the correct one attached before writeback
953		 * occurs.
954		 */
955		bp->b_ops = &xfs_dquot_buf_ops;
956		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
957		xfs_buf_delwri_queue(bp, buffer_list);
958		xfs_buf_relse(bp);
959
960		/* goto the next block. */
961		bno++;
962		firstid += mp->m_quotainfo->qi_dqperchunk;
963	}
964
965	return error;
966}
967
968/*
969 * Iterate over all allocated dquot blocks in this quota inode, zeroing all
970 * counters for every chunk of dquots that we find.
971 */
972STATIC int
973xfs_qm_reset_dqcounts_buf(
974	struct xfs_mount	*mp,
975	struct xfs_inode	*qip,
976	xfs_dqtype_t		type,
977	struct list_head	*buffer_list)
978{
979	struct xfs_bmbt_irec	*map;
980	int			i, nmaps;	/* number of map entries */
981	int			error;		/* return value */
982	xfs_fileoff_t		lblkno;
983	xfs_filblks_t		maxlblkcnt;
984	xfs_dqid_t		firstid;
985	xfs_fsblock_t		rablkno;
986	xfs_filblks_t		rablkcnt;
987
988	error = 0;
989	/*
990	 * This looks racy, but we can't keep an inode lock across a
991	 * trans_reserve. But, this gets called during quotacheck, and that
992	 * happens only at mount time which is single threaded.
993	 */
994	if (qip->i_d.di_nblocks == 0)
995		return 0;
996
997	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0);
998
999	lblkno = 0;
1000	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
1001	do {
1002		uint		lock_mode;
1003
1004		nmaps = XFS_DQITER_MAP_SIZE;
1005		/*
1006		 * We aren't changing the inode itself. Just changing
1007		 * some of its data. No new blocks are added here, and
1008		 * the inode is never added to the transaction.
1009		 */
1010		lock_mode = xfs_ilock_data_map_shared(qip);
1011		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1012				       map, &nmaps, 0);
1013		xfs_iunlock(qip, lock_mode);
1014		if (error)
1015			break;
1016
1017		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1018		for (i = 0; i < nmaps; i++) {
1019			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1020			ASSERT(map[i].br_blockcount);
1021
1022
1023			lblkno += map[i].br_blockcount;
1024
1025			if (map[i].br_startblock == HOLESTARTBLOCK)
1026				continue;
1027
1028			firstid = (xfs_dqid_t) map[i].br_startoff *
1029				mp->m_quotainfo->qi_dqperchunk;
1030			/*
1031			 * Do a read-ahead on the next extent.
1032			 */
1033			if ((i+1 < nmaps) &&
1034			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1035				rablkcnt =  map[i+1].br_blockcount;
1036				rablkno = map[i+1].br_startblock;
1037				while (rablkcnt--) {
1038					xfs_buf_readahead(mp->m_ddev_targp,
1039					       XFS_FSB_TO_DADDR(mp, rablkno),
1040					       mp->m_quotainfo->qi_dqchunklen,
1041					       &xfs_dquot_buf_ops);
1042					rablkno++;
1043				}
1044			}
1045			/*
1046			 * Iterate thru all the blks in the extent and
1047			 * reset the counters of all the dquots inside them.
1048			 */
1049			error = xfs_qm_reset_dqcounts_all(mp, firstid,
1050						   map[i].br_startblock,
1051						   map[i].br_blockcount,
1052						   type, buffer_list);
1053			if (error)
1054				goto out;
1055		}
1056	} while (nmaps > 0);
1057
1058out:
1059	kmem_free(map);
1060	return error;
1061}
1062
1063/*
1064 * Called by dqusage_adjust in doing a quotacheck.
1065 *
1066 * Given the inode, and a dquot id this updates both the incore dqout as well
1067 * as the buffer copy. This is so that once the quotacheck is done, we can
1068 * just log all the buffers, as opposed to logging numerous updates to
1069 * individual dquots.
1070 */
1071STATIC int
1072xfs_qm_quotacheck_dqadjust(
1073	struct xfs_inode	*ip,
1074	xfs_dqtype_t		type,
1075	xfs_qcnt_t		nblks,
1076	xfs_qcnt_t		rtblks)
1077{
1078	struct xfs_mount	*mp = ip->i_mount;
1079	struct xfs_dquot	*dqp;
1080	xfs_dqid_t		id;
1081	int			error;
1082
1083	id = xfs_qm_id_for_quotatype(ip, type);
1084	error = xfs_qm_dqget(mp, id, type, true, &dqp);
1085	if (error) {
1086		/*
1087		 * Shouldn't be able to turn off quotas here.
1088		 */
1089		ASSERT(error != -ESRCH);
1090		ASSERT(error != -ENOENT);
1091		return error;
1092	}
1093
1094	trace_xfs_dqadjust(dqp);
1095
1096	/*
1097	 * Adjust the inode count and the block count to reflect this inode's
1098	 * resource usage.
1099	 */
1100	dqp->q_ino.count++;
1101	dqp->q_ino.reserved++;
1102	if (nblks) {
1103		dqp->q_blk.count += nblks;
1104		dqp->q_blk.reserved += nblks;
1105	}
1106	if (rtblks) {
1107		dqp->q_rtb.count += rtblks;
1108		dqp->q_rtb.reserved += rtblks;
1109	}
1110
1111	/*
1112	 * Set default limits, adjust timers (since we changed usages)
1113	 *
1114	 * There are no timers for the default values set in the root dquot.
1115	 */
1116	if (dqp->q_id) {
1117		xfs_qm_adjust_dqlimits(dqp);
1118		xfs_qm_adjust_dqtimers(dqp);
1119	}
1120
1121	dqp->q_flags |= XFS_DQFLAG_DIRTY;
1122	xfs_qm_dqput(dqp);
1123	return 0;
1124}
1125
1126/*
1127 * callback routine supplied to bulkstat(). Given an inumber, find its
1128 * dquots and update them to account for resources taken by that inode.
1129 */
1130/* ARGSUSED */
1131STATIC int
1132xfs_qm_dqusage_adjust(
1133	struct xfs_mount	*mp,
1134	struct xfs_trans	*tp,
1135	xfs_ino_t		ino,
1136	void			*data)
1137{
1138	struct xfs_inode	*ip;
1139	xfs_qcnt_t		nblks;
1140	xfs_filblks_t		rtblks = 0;	/* total rt blks */
1141	int			error;
1142
1143	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1144
1145	/*
1146	 * rootino must have its resources accounted for, not so with the quota
1147	 * inodes.
1148	 */
1149	if (xfs_is_quota_inode(&mp->m_sb, ino))
1150		return 0;
1151
1152	/*
1153	 * We don't _need_ to take the ilock EXCL here because quotacheck runs
1154	 * at mount time and therefore nobody will be racing chown/chproj.
1155	 */
1156	error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
1157	if (error == -EINVAL || error == -ENOENT)
1158		return 0;
1159	if (error)
1160		return error;
1161
1162	ASSERT(ip->i_delayed_blks == 0);
1163
1164	if (XFS_IS_REALTIME_INODE(ip)) {
1165		struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1166
1167		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1168			error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1169			if (error)
1170				goto error0;
1171		}
1172
1173		xfs_bmap_count_leaves(ifp, &rtblks);
1174	}
1175
1176	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1177
1178	/*
1179	 * Add the (disk blocks and inode) resources occupied by this
1180	 * inode to its dquots. We do this adjustment in the incore dquot,
1181	 * and also copy the changes to its buffer.
1182	 * We don't care about putting these changes in a transaction
1183	 * envelope because if we crash in the middle of a 'quotacheck'
1184	 * we have to start from the beginning anyway.
1185	 * Once we're done, we'll log all the dquot bufs.
1186	 *
1187	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1188	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1189	 */
1190	if (XFS_IS_UQUOTA_ON(mp)) {
1191		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_USER, nblks,
1192				rtblks);
1193		if (error)
1194			goto error0;
1195	}
1196
1197	if (XFS_IS_GQUOTA_ON(mp)) {
1198		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_GROUP, nblks,
1199				rtblks);
1200		if (error)
1201			goto error0;
1202	}
1203
1204	if (XFS_IS_PQUOTA_ON(mp)) {
1205		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQTYPE_PROJ, nblks,
1206				rtblks);
1207		if (error)
1208			goto error0;
1209	}
1210
1211error0:
1212	xfs_irele(ip);
1213	return error;
1214}
1215
1216STATIC int
1217xfs_qm_flush_one(
1218	struct xfs_dquot	*dqp,
1219	void			*data)
1220{
1221	struct xfs_mount	*mp = dqp->q_mount;
1222	struct list_head	*buffer_list = data;
1223	struct xfs_buf		*bp = NULL;
1224	int			error = 0;
1225
1226	xfs_dqlock(dqp);
1227	if (dqp->q_flags & XFS_DQFLAG_FREEING)
1228		goto out_unlock;
1229	if (!XFS_DQ_IS_DIRTY(dqp))
1230		goto out_unlock;
1231
1232	/*
1233	 * The only way the dquot is already flush locked by the time quotacheck
1234	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
1235	 * it for the final time. Quotacheck collects all dquot bufs in the
1236	 * local delwri queue before dquots are dirtied, so reclaim can't have
1237	 * possibly queued it for I/O. The only way out is to push the buffer to
1238	 * cycle the flush lock.
1239	 */
1240	if (!xfs_dqflock_nowait(dqp)) {
1241		/* buf is pinned in-core by delwri list */
1242		bp = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
1243				mp->m_quotainfo->qi_dqchunklen, 0);
1244		if (!bp) {
1245			error = -EINVAL;
1246			goto out_unlock;
1247		}
1248		xfs_buf_unlock(bp);
1249
1250		xfs_buf_delwri_pushbuf(bp, buffer_list);
1251		xfs_buf_rele(bp);
1252
1253		error = -EAGAIN;
1254		goto out_unlock;
1255	}
1256
1257	error = xfs_qm_dqflush(dqp, &bp);
1258	if (error)
1259		goto out_unlock;
1260
1261	xfs_buf_delwri_queue(bp, buffer_list);
1262	xfs_buf_relse(bp);
1263out_unlock:
1264	xfs_dqunlock(dqp);
1265	return error;
1266}
1267
1268/*
1269 * Walk thru all the filesystem inodes and construct a consistent view
1270 * of the disk quota world. If the quotacheck fails, disable quotas.
1271 */
1272STATIC int
1273xfs_qm_quotacheck(
1274	xfs_mount_t	*mp)
1275{
1276	int			error, error2;
1277	uint			flags;
1278	LIST_HEAD		(buffer_list);
1279	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip;
1280	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip;
1281	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;
1282
1283	flags = 0;
1284
1285	ASSERT(uip || gip || pip);
1286	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1287
1288	xfs_notice(mp, "Quotacheck needed: Please wait.");
1289
1290	/*
1291	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1292	 * their counters to zero. We need a clean slate.
1293	 * We don't log our changes till later.
1294	 */
1295	if (uip) {
1296		error = xfs_qm_reset_dqcounts_buf(mp, uip, XFS_DQTYPE_USER,
1297					 &buffer_list);
1298		if (error)
1299			goto error_return;
1300		flags |= XFS_UQUOTA_CHKD;
1301	}
1302
1303	if (gip) {
1304		error = xfs_qm_reset_dqcounts_buf(mp, gip, XFS_DQTYPE_GROUP,
1305					 &buffer_list);
1306		if (error)
1307			goto error_return;
1308		flags |= XFS_GQUOTA_CHKD;
1309	}
1310
1311	if (pip) {
1312		error = xfs_qm_reset_dqcounts_buf(mp, pip, XFS_DQTYPE_PROJ,
1313					 &buffer_list);
1314		if (error)
1315			goto error_return;
1316		flags |= XFS_PQUOTA_CHKD;
1317	}
1318
1319	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
1320			NULL);
1321	if (error) {
1322		/*
1323		 * The inode walk may have partially populated the dquot
1324		 * caches.  We must purge them before disabling quota and
1325		 * tearing down the quotainfo, or else the dquots will leak.
1326		 */
1327		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1328		goto error_return;
1329	}
1330
1331	/*
1332	 * We've made all the changes that we need to make incore.  Flush them
1333	 * down to disk buffers if everything was updated successfully.
1334	 */
1335	if (XFS_IS_UQUOTA_ON(mp)) {
1336		error = xfs_qm_dquot_walk(mp, XFS_DQTYPE_USER, xfs_qm_flush_one,
1337					  &buffer_list);
1338	}
1339	if (XFS_IS_GQUOTA_ON(mp)) {
1340		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_GROUP, xfs_qm_flush_one,
1341					   &buffer_list);
1342		if (!error)
1343			error = error2;
1344	}
1345	if (XFS_IS_PQUOTA_ON(mp)) {
1346		error2 = xfs_qm_dquot_walk(mp, XFS_DQTYPE_PROJ, xfs_qm_flush_one,
1347					   &buffer_list);
1348		if (!error)
1349			error = error2;
1350	}
1351
1352	error2 = xfs_buf_delwri_submit(&buffer_list);
1353	if (!error)
1354		error = error2;
1355
1356	/*
1357	 * We can get this error if we couldn't do a dquot allocation inside
1358	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1359	 * dirty dquots that might be cached, we just want to get rid of them
1360	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1361	 * at this point (because we intentionally didn't in dqget_noattach).
1362	 */
1363	if (error) {
1364		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1365		goto error_return;
1366	}
1367
1368	/*
1369	 * If one type of quotas is off, then it will lose its
1370	 * quotachecked status, since we won't be doing accounting for
1371	 * that type anymore.
1372	 */
1373	mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1374	mp->m_qflags |= flags;
1375
1376 error_return:
1377	xfs_buf_delwri_cancel(&buffer_list);
1378
1379	if (error) {
1380		xfs_warn(mp,
1381	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1382			error);
1383		/*
1384		 * We must turn off quotas.
1385		 */
1386		ASSERT(mp->m_quotainfo != NULL);
1387		xfs_qm_destroy_quotainfo(mp);
1388		if (xfs_mount_reset_sbqflags(mp)) {
1389			xfs_warn(mp,
1390				"Quotacheck: Failed to reset quota flags.");
1391		}
1392	} else
1393		xfs_notice(mp, "Quotacheck: Done.");
1394	return error;
1395}
1396
1397/*
1398 * This is called from xfs_mountfs to start quotas and initialize all
1399 * necessary data structures like quotainfo.  This is also responsible for
1400 * running a quotacheck as necessary.  We are guaranteed that the superblock
1401 * is consistently read in at this point.
1402 *
1403 * If we fail here, the mount will continue with quota turned off. We don't
1404 * need to inidicate success or failure at all.
1405 */
1406void
1407xfs_qm_mount_quotas(
1408	struct xfs_mount	*mp)
1409{
1410	int			error = 0;
1411	uint			sbf;
1412
1413	/*
1414	 * If quotas on realtime volumes is not supported, we disable
1415	 * quotas immediately.
1416	 */
1417	if (mp->m_sb.sb_rextents) {
1418		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1419		mp->m_qflags = 0;
1420		goto write_changes;
1421	}
1422
1423	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1424
1425	/*
1426	 * Allocate the quotainfo structure inside the mount struct, and
1427	 * create quotainode(s), and change/rev superblock if necessary.
1428	 */
1429	error = xfs_qm_init_quotainfo(mp);
1430	if (error) {
1431		/*
1432		 * We must turn off quotas.
1433		 */
1434		ASSERT(mp->m_quotainfo == NULL);
1435		mp->m_qflags = 0;
1436		goto write_changes;
1437	}
1438	/*
1439	 * If any of the quotas are not consistent, do a quotacheck.
1440	 */
1441	if (XFS_QM_NEED_QUOTACHECK(mp)) {
1442		error = xfs_qm_quotacheck(mp);
1443		if (error) {
1444			/* Quotacheck failed and disabled quotas. */
1445			return;
1446		}
1447	}
1448	/*
1449	 * If one type of quotas is off, then it will lose its
1450	 * quotachecked status, since we won't be doing accounting for
1451	 * that type anymore.
1452	 */
1453	if (!XFS_IS_UQUOTA_ON(mp))
1454		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
1455	if (!XFS_IS_GQUOTA_ON(mp))
1456		mp->m_qflags &= ~XFS_GQUOTA_CHKD;
1457	if (!XFS_IS_PQUOTA_ON(mp))
1458		mp->m_qflags &= ~XFS_PQUOTA_CHKD;
1459
1460 write_changes:
1461	/*
1462	 * We actually don't have to acquire the m_sb_lock at all.
1463	 * This can only be called from mount, and that's single threaded. XXX
1464	 */
1465	spin_lock(&mp->m_sb_lock);
1466	sbf = mp->m_sb.sb_qflags;
1467	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
1468	spin_unlock(&mp->m_sb_lock);
1469
1470	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1471		if (xfs_sync_sb(mp, false)) {
1472			/*
1473			 * We could only have been turning quotas off.
1474			 * We aren't in very good shape actually because
1475			 * the incore structures are convinced that quotas are
1476			 * off, but the on disk superblock doesn't know that !
1477			 */
1478			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
1479			xfs_alert(mp, "%s: Superblock update failed!",
1480				__func__);
1481		}
1482	}
1483
1484	if (error) {
1485		xfs_warn(mp, "Failed to initialize disk quotas.");
1486		return;
1487	}
1488}
1489
1490/*
1491 * This is called after the superblock has been read in and we're ready to
1492 * iget the quota inodes.
1493 */
1494STATIC int
1495xfs_qm_init_quotainos(
1496	xfs_mount_t	*mp)
1497{
1498	struct xfs_inode	*uip = NULL;
1499	struct xfs_inode	*gip = NULL;
1500	struct xfs_inode	*pip = NULL;
1501	int			error;
1502	uint			flags = 0;
1503
1504	ASSERT(mp->m_quotainfo);
1505
1506	/*
1507	 * Get the uquota and gquota inodes
1508	 */
1509	if (xfs_sb_version_hasquota(&mp->m_sb)) {
1510		if (XFS_IS_UQUOTA_ON(mp) &&
1511		    mp->m_sb.sb_uquotino != NULLFSINO) {
1512			ASSERT(mp->m_sb.sb_uquotino > 0);
1513			error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1514					     0, 0, &uip);
1515			if (error)
1516				return error;
1517		}
1518		if (XFS_IS_GQUOTA_ON(mp) &&
1519		    mp->m_sb.sb_gquotino != NULLFSINO) {
1520			ASSERT(mp->m_sb.sb_gquotino > 0);
1521			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1522					     0, 0, &gip);
1523			if (error)
1524				goto error_rele;
1525		}
1526		if (XFS_IS_PQUOTA_ON(mp) &&
1527		    mp->m_sb.sb_pquotino != NULLFSINO) {
1528			ASSERT(mp->m_sb.sb_pquotino > 0);
1529			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
1530					     0, 0, &pip);
1531			if (error)
1532				goto error_rele;
1533		}
1534	} else {
1535		flags |= XFS_QMOPT_SBVERSION;
1536	}
1537
1538	/*
1539	 * Create the three inodes, if they don't exist already. The changes
1540	 * made above will get added to a transaction and logged in one of
1541	 * the qino_alloc calls below.  If the device is readonly,
1542	 * temporarily switch to read-write to do this.
1543	 */
1544	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1545		error = xfs_qm_qino_alloc(mp, &uip,
1546					      flags | XFS_QMOPT_UQUOTA);
1547		if (error)
1548			goto error_rele;
1549
1550		flags &= ~XFS_QMOPT_SBVERSION;
1551	}
1552	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1553		error = xfs_qm_qino_alloc(mp, &gip,
1554					  flags | XFS_QMOPT_GQUOTA);
1555		if (error)
1556			goto error_rele;
1557
1558		flags &= ~XFS_QMOPT_SBVERSION;
1559	}
1560	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1561		error = xfs_qm_qino_alloc(mp, &pip,
1562					  flags | XFS_QMOPT_PQUOTA);
1563		if (error)
1564			goto error_rele;
1565	}
1566
1567	mp->m_quotainfo->qi_uquotaip = uip;
1568	mp->m_quotainfo->qi_gquotaip = gip;
1569	mp->m_quotainfo->qi_pquotaip = pip;
1570
1571	return 0;
1572
1573error_rele:
1574	if (uip)
1575		xfs_irele(uip);
1576	if (gip)
1577		xfs_irele(gip);
1578	if (pip)
1579		xfs_irele(pip);
1580	return error;
1581}
1582
1583STATIC void
1584xfs_qm_destroy_quotainos(
1585	struct xfs_quotainfo	*qi)
1586{
1587	if (qi->qi_uquotaip) {
1588		xfs_irele(qi->qi_uquotaip);
1589		qi->qi_uquotaip = NULL; /* paranoia */
1590	}
1591	if (qi->qi_gquotaip) {
1592		xfs_irele(qi->qi_gquotaip);
1593		qi->qi_gquotaip = NULL;
1594	}
1595	if (qi->qi_pquotaip) {
1596		xfs_irele(qi->qi_pquotaip);
1597		qi->qi_pquotaip = NULL;
1598	}
1599}
1600
1601STATIC void
1602xfs_qm_dqfree_one(
1603	struct xfs_dquot	*dqp)
1604{
1605	struct xfs_mount	*mp = dqp->q_mount;
1606	struct xfs_quotainfo	*qi = mp->m_quotainfo;
1607
1608	mutex_lock(&qi->qi_tree_lock);
1609	radix_tree_delete(xfs_dquot_tree(qi, xfs_dquot_type(dqp)), dqp->q_id);
1610
1611	qi->qi_dquots--;
1612	mutex_unlock(&qi->qi_tree_lock);
1613
1614	xfs_qm_dqdestroy(dqp);
1615}
1616
1617/* --------------- utility functions for vnodeops ---------------- */
1618
1619
1620/*
1621 * Given an inode, a uid, gid and prid make sure that we have
1622 * allocated relevant dquot(s) on disk, and that we won't exceed inode
1623 * quotas by creating this file.
1624 * This also attaches dquot(s) to the given inode after locking it,
1625 * and returns the dquots corresponding to the uid and/or gid.
1626 *
1627 * in	: inode (unlocked)
1628 * out	: udquot, gdquot with references taken and unlocked
1629 */
1630int
1631xfs_qm_vop_dqalloc(
1632	struct xfs_inode	*ip,
1633	kuid_t			uid,
1634	kgid_t			gid,
1635	prid_t			prid,
1636	uint			flags,
1637	struct xfs_dquot	**O_udqpp,
1638	struct xfs_dquot	**O_gdqpp,
1639	struct xfs_dquot	**O_pdqpp)
1640{
1641	struct xfs_mount	*mp = ip->i_mount;
1642	struct inode		*inode = VFS_I(ip);
1643	struct user_namespace	*user_ns = inode->i_sb->s_user_ns;
1644	struct xfs_dquot	*uq = NULL;
1645	struct xfs_dquot	*gq = NULL;
1646	struct xfs_dquot	*pq = NULL;
1647	int			error;
1648	uint			lockflags;
1649
1650	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1651		return 0;
1652
1653	lockflags = XFS_ILOCK_EXCL;
1654	xfs_ilock(ip, lockflags);
1655
1656	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1657		gid = inode->i_gid;
1658
1659	/*
1660	 * Attach the dquot(s) to this inode, doing a dquot allocation
1661	 * if necessary. The dquot(s) will not be locked.
1662	 */
1663	if (XFS_NOT_DQATTACHED(mp, ip)) {
1664		error = xfs_qm_dqattach_locked(ip, true);
1665		if (error) {
1666			xfs_iunlock(ip, lockflags);
1667			return error;
1668		}
1669	}
1670
1671	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1672		ASSERT(O_udqpp);
1673		if (!uid_eq(inode->i_uid, uid)) {
1674			/*
1675			 * What we need is the dquot that has this uid, and
1676			 * if we send the inode to dqget, the uid of the inode
1677			 * takes priority over what's sent in the uid argument.
1678			 * We must unlock inode here before calling dqget if
1679			 * we're not sending the inode, because otherwise
1680			 * we'll deadlock by doing trans_reserve while
1681			 * holding ilock.
1682			 */
1683			xfs_iunlock(ip, lockflags);
1684			error = xfs_qm_dqget(mp, from_kuid(user_ns, uid),
1685					XFS_DQTYPE_USER, true, &uq);
1686			if (error) {
1687				ASSERT(error != -ENOENT);
1688				return error;
1689			}
1690			/*
1691			 * Get the ilock in the right order.
1692			 */
1693			xfs_dqunlock(uq);
1694			lockflags = XFS_ILOCK_SHARED;
1695			xfs_ilock(ip, lockflags);
1696		} else {
1697			/*
1698			 * Take an extra reference, because we'll return
1699			 * this to caller
1700			 */
1701			ASSERT(ip->i_udquot);
1702			uq = xfs_qm_dqhold(ip->i_udquot);
1703		}
1704	}
1705	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1706		ASSERT(O_gdqpp);
1707		if (!gid_eq(inode->i_gid, gid)) {
1708			xfs_iunlock(ip, lockflags);
1709			error = xfs_qm_dqget(mp, from_kgid(user_ns, gid),
1710					XFS_DQTYPE_GROUP, true, &gq);
1711			if (error) {
1712				ASSERT(error != -ENOENT);
1713				goto error_rele;
1714			}
1715			xfs_dqunlock(gq);
1716			lockflags = XFS_ILOCK_SHARED;
1717			xfs_ilock(ip, lockflags);
1718		} else {
1719			ASSERT(ip->i_gdquot);
1720			gq = xfs_qm_dqhold(ip->i_gdquot);
1721		}
1722	}
1723	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1724		ASSERT(O_pdqpp);
1725		if (ip->i_d.di_projid != prid) {
1726			xfs_iunlock(ip, lockflags);
1727			error = xfs_qm_dqget(mp, prid,
1728					XFS_DQTYPE_PROJ, true, &pq);
1729			if (error) {
1730				ASSERT(error != -ENOENT);
1731				goto error_rele;
1732			}
1733			xfs_dqunlock(pq);
1734			lockflags = XFS_ILOCK_SHARED;
1735			xfs_ilock(ip, lockflags);
1736		} else {
1737			ASSERT(ip->i_pdquot);
1738			pq = xfs_qm_dqhold(ip->i_pdquot);
1739		}
1740	}
1741	trace_xfs_dquot_dqalloc(ip);
1742
1743	xfs_iunlock(ip, lockflags);
1744	if (O_udqpp)
1745		*O_udqpp = uq;
1746	else
1747		xfs_qm_dqrele(uq);
1748	if (O_gdqpp)
1749		*O_gdqpp = gq;
1750	else
1751		xfs_qm_dqrele(gq);
1752	if (O_pdqpp)
1753		*O_pdqpp = pq;
1754	else
1755		xfs_qm_dqrele(pq);
1756	return 0;
1757
1758error_rele:
1759	xfs_qm_dqrele(gq);
1760	xfs_qm_dqrele(uq);
1761	return error;
1762}
1763
1764/*
1765 * Actually transfer ownership, and do dquot modifications.
1766 * These were already reserved.
1767 */
1768struct xfs_dquot *
1769xfs_qm_vop_chown(
1770	struct xfs_trans	*tp,
1771	struct xfs_inode	*ip,
1772	struct xfs_dquot	**IO_olddq,
1773	struct xfs_dquot	*newdq)
1774{
1775	struct xfs_dquot	*prevdq;
1776	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
1777				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1778
1779
1780	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1781	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
1782
1783	/* old dquot */
1784	prevdq = *IO_olddq;
1785	ASSERT(prevdq);
1786	ASSERT(prevdq != newdq);
1787
1788	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
1789	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1790
1791	/* the sparkling new dquot */
1792	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
1793	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1794
1795	/*
1796	 * Back when we made quota reservations for the chown, we reserved the
1797	 * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
1798	 * switched the dquots, decrease the new dquot's block reservation
1799	 * (having already bumped up the real counter) so that we don't have
1800	 * any reservation to give back when we commit.
1801	 */
1802	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
1803			-ip->i_delayed_blks);
1804
1805	/*
1806	 * Give the incore reservation for delalloc blocks back to the old
1807	 * dquot.  We don't normally handle delalloc quota reservations
1808	 * transactionally, so just lock the dquot and subtract from the
1809	 * reservation.  Dirty the transaction because it's too late to turn
1810	 * back now.
1811	 */
1812	tp->t_flags |= XFS_TRANS_DIRTY;
1813	xfs_dqlock(prevdq);
1814	ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
1815	prevdq->q_blk.reserved -= ip->i_delayed_blks;
1816	xfs_dqunlock(prevdq);
1817
1818	/*
1819	 * Take an extra reference, because the inode is going to keep
1820	 * this dquot pointer even after the trans_commit.
1821	 */
1822	*IO_olddq = xfs_qm_dqhold(newdq);
1823
1824	return prevdq;
1825}
1826
1827/*
1828 * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
1829 */
1830int
1831xfs_qm_vop_chown_reserve(
1832	struct xfs_trans	*tp,
1833	struct xfs_inode	*ip,
1834	struct xfs_dquot	*udqp,
1835	struct xfs_dquot	*gdqp,
1836	struct xfs_dquot	*pdqp,
1837	uint			flags)
1838{
1839	struct xfs_mount	*mp = ip->i_mount;
1840	unsigned int		blkflags;
1841	struct xfs_dquot	*udq_delblks = NULL;
1842	struct xfs_dquot	*gdq_delblks = NULL;
1843	struct xfs_dquot	*pdq_delblks = NULL;
1844
1845	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
1846	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1847
1848	blkflags = XFS_IS_REALTIME_INODE(ip) ?
1849			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
1850
1851	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
1852	    i_uid_read(VFS_I(ip)) != udqp->q_id)
1853		udq_delblks = udqp;
1854
1855	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
1856	    i_gid_read(VFS_I(ip)) != gdqp->q_id)
1857		gdq_delblks = gdqp;
1858
1859	if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
1860	    ip->i_d.di_projid != pdqp->q_id)
1861		pdq_delblks = pdqp;
1862
1863	/*
1864	 * Reserve enough quota to handle blocks on disk and reserved for a
1865	 * delayed allocation.  We'll actually transfer the delalloc
1866	 * reservation between dquots at chown time, even though that part is
1867	 * only semi-transactional.
1868	 */
1869	return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks,
1870			gdq_delblks, pdq_delblks,
1871			ip->i_d.di_nblocks + ip->i_delayed_blks,
1872			1, blkflags | flags);
1873}
1874
1875int
1876xfs_qm_vop_rename_dqattach(
1877	struct xfs_inode	**i_tab)
1878{
1879	struct xfs_mount	*mp = i_tab[0]->i_mount;
1880	int			i;
1881
1882	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1883		return 0;
1884
1885	for (i = 0; (i < 4 && i_tab[i]); i++) {
1886		struct xfs_inode	*ip = i_tab[i];
1887		int			error;
1888
1889		/*
1890		 * Watch out for duplicate entries in the table.
1891		 */
1892		if (i == 0 || ip != i_tab[i-1]) {
1893			if (XFS_NOT_DQATTACHED(mp, ip)) {
1894				error = xfs_qm_dqattach(ip);
1895				if (error)
1896					return error;
1897			}
1898		}
1899	}
1900	return 0;
1901}
1902
1903void
1904xfs_qm_vop_create_dqattach(
1905	struct xfs_trans	*tp,
1906	struct xfs_inode	*ip,
1907	struct xfs_dquot	*udqp,
1908	struct xfs_dquot	*gdqp,
1909	struct xfs_dquot	*pdqp)
1910{
1911	struct xfs_mount	*mp = tp->t_mountp;
1912
1913	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1914		return;
1915
1916	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1917
1918	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
1919		ASSERT(ip->i_udquot == NULL);
1920		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);
1921
1922		ip->i_udquot = xfs_qm_dqhold(udqp);
1923		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
1924	}
1925	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
1926		ASSERT(ip->i_gdquot == NULL);
1927		ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id);
1928
1929		ip->i_gdquot = xfs_qm_dqhold(gdqp);
1930		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
1931	}
1932	if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
1933		ASSERT(ip->i_pdquot == NULL);
1934		ASSERT(ip->i_d.di_projid == pdqp->q_id);
1935
1936		ip->i_pdquot = xfs_qm_dqhold(pdqp);
1937		xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
1938	}
1939}
1940
1941