xref: /kernel/linux/linux-5.10/fs/ntfs/lcnalloc.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * lcnalloc.c - Cluster (de)allocation code.  Part of the Linux-NTFS project.
4 *
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 */
7
8#ifdef NTFS_RW
9
10#include <linux/pagemap.h>
11
12#include "lcnalloc.h"
13#include "debug.h"
14#include "bitmap.h"
15#include "inode.h"
16#include "volume.h"
17#include "attrib.h"
18#include "malloc.h"
19#include "aops.h"
20#include "ntfs.h"
21
22/**
23 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
24 * @vol:	mounted ntfs volume on which to free the clusters
25 * @rl:		runlist describing the clusters to free
26 *
27 * Free all the clusters described by the runlist @rl on the volume @vol.  In
28 * the case of an error being returned, at least some of the clusters were not
29 * freed.
30 *
31 * Return 0 on success and -errno on error.
32 *
33 * Locking: - The volume lcn bitmap must be locked for writing on entry and is
34 *	      left locked on return.
35 */
36int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
37		const runlist_element *rl)
38{
39	struct inode *lcnbmp_vi = vol->lcnbmp_ino;
40	int ret = 0;
41
42	ntfs_debug("Entering.");
43	if (!rl)
44		return 0;
45	for (; rl->length; rl++) {
46		int err;
47
48		if (rl->lcn < 0)
49			continue;
50		err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
51		if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
52			ret = err;
53	}
54	ntfs_debug("Done.");
55	return ret;
56}
57
58/**
59 * ntfs_cluster_alloc - allocate clusters on an ntfs volume
60 * @vol:	mounted ntfs volume on which to allocate the clusters
61 * @start_vcn:	vcn to use for the first allocated cluster
62 * @count:	number of clusters to allocate
63 * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
64 * @zone:	zone from which to allocate the clusters
65 * @is_extension:	if 'true', this is an attribute extension
66 *
67 * Allocate @count clusters preferably starting at cluster @start_lcn or at the
68 * current allocator position if @start_lcn is -1, on the mounted ntfs volume
69 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
70 * MFT_ZONE for allocation of clusters for the master file table, i.e. the
71 * $MFT/$DATA attribute.
72 *
73 * @start_vcn specifies the vcn of the first allocated cluster.  This makes
74 * merging the resulting runlist with the old runlist easier.
75 *
76 * If @is_extension is 'true', the caller is allocating clusters to extend an
77 * attribute and if it is 'false', the caller is allocating clusters to fill a
78 * hole in an attribute.  Practically the difference is that if @is_extension
79 * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
80 * @is_extension is 'false' the runlist will be terminated with
81 * LCN_RL_NOT_MAPPED.
82 *
83 * You need to check the return value with IS_ERR().  If this is false, the
84 * function was successful and the return value is a runlist describing the
85 * allocated cluster(s).  If IS_ERR() is true, the function failed and
86 * PTR_ERR() gives you the error code.
87 *
88 * Notes on the allocation algorithm
89 * =================================
90 *
91 * There are two data zones.  First is the area between the end of the mft zone
92 * and the end of the volume, and second is the area between the start of the
93 * volume and the start of the mft zone.  On unmodified/standard NTFS 1.x
94 * volumes, the second data zone does not exist due to the mft zone being
95 * expanded to cover the start of the volume in order to reserve space for the
96 * mft bitmap attribute.
97 *
98 * This is not the prettiest function but the complexity stems from the need of
99 * implementing the mft vs data zoned approach and from the fact that we have
100 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
101 * need to cope with crossing over boundaries of two buffers.  Further, the
102 * fact that the allocator allows for caller supplied hints as to the location
103 * of where allocation should begin and the fact that the allocator keeps track
104 * of where in the data zones the next natural allocation should occur,
105 * contribute to the complexity of the function.  But it should all be
106 * worthwhile, because this allocator should: 1) be a full implementation of
107 * the MFT zone approach used by Windows NT, 2) cause reduction in
108 * fragmentation, and 3) be speedy in allocations (the code is not optimized
109 * for speed, but the algorithm is, so further speed improvements are probably
110 * possible).
111 *
112 * FIXME: We should be monitoring cluster allocation and increment the MFT zone
113 * size dynamically but this is something for the future.  We will just cause
114 * heavier fragmentation by not doing it and I am not even sure Windows would
115 * grow the MFT zone dynamically, so it might even be correct not to do this.
116 * The overhead in doing dynamic MFT zone expansion would be very large and
117 * unlikely worth the effort. (AIA)
118 *
119 * TODO: I have added in double the required zone position pointer wrap around
120 * logic which can be optimized to having only one of the two logic sets.
121 * However, having the double logic will work fine, but if we have only one of
122 * the sets and we get it wrong somewhere, then we get into trouble, so
123 * removing the duplicate logic requires _very_ careful consideration of _all_
124 * possible code paths.  So at least for now, I am leaving the double logic -
125 * better safe than sorry... (AIA)
126 *
127 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
128 *	      on return.
129 *	    - This function takes the volume lcn bitmap lock for writing and
130 *	      modifies the bitmap contents.
131 */
132runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
133		const s64 count, const LCN start_lcn,
134		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
135		const bool is_extension)
136{
137	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
138	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
139	s64 clusters;
140	loff_t i_size;
141	struct inode *lcnbmp_vi;
142	runlist_element *rl = NULL;
143	struct address_space *mapping;
144	struct page *page = NULL;
145	u8 *buf, *byte;
146	int err = 0, rlpos, rlsize, buf_size;
147	u8 pass, done_zones, search_zone, need_writeback = 0, bit;
148
149	ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn "
150			"0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn,
151			(unsigned long long)count,
152			(unsigned long long)start_lcn,
153			zone == MFT_ZONE ? "MFT" : "DATA");
154	BUG_ON(!vol);
155	lcnbmp_vi = vol->lcnbmp_ino;
156	BUG_ON(!lcnbmp_vi);
157	BUG_ON(start_vcn < 0);
158	BUG_ON(count < 0);
159	BUG_ON(start_lcn < -1);
160	BUG_ON(zone < FIRST_ZONE);
161	BUG_ON(zone > LAST_ZONE);
162
163	/* Return NULL if @count is zero. */
164	if (!count)
165		return NULL;
166	/* Take the lcnbmp lock for writing. */
167	down_write(&vol->lcnbmp_lock);
168	/*
169	 * If no specific @start_lcn was requested, use the current data zone
170	 * position, otherwise use the requested @start_lcn but make sure it
171	 * lies outside the mft zone.  Also set done_zones to 0 (no zones done)
172	 * and pass depending on whether we are starting inside a zone (1) or
173	 * at the beginning of a zone (2).  If requesting from the MFT_ZONE,
174	 * we either start at the current position within the mft zone or at
175	 * the specified position.  If the latter is out of bounds then we start
176	 * at the beginning of the MFT_ZONE.
177	 */
178	done_zones = 0;
179	pass = 1;
180	/*
181	 * zone_start and zone_end are the current search range.  search_zone
182	 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
183	 * volume) and 4 for data zone 2 (start of volume till start of mft
184	 * zone).
185	 */
186	zone_start = start_lcn;
187	if (zone_start < 0) {
188		if (zone == DATA_ZONE)
189			zone_start = vol->data1_zone_pos;
190		else
191			zone_start = vol->mft_zone_pos;
192		if (!zone_start) {
193			/*
194			 * Zone starts at beginning of volume which means a
195			 * single pass is sufficient.
196			 */
197			pass = 2;
198		}
199	} else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start &&
200			zone_start < vol->mft_zone_end) {
201		zone_start = vol->mft_zone_end;
202		/*
203		 * Starting at beginning of data1_zone which means a single
204		 * pass in this zone is sufficient.
205		 */
206		pass = 2;
207	} else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start ||
208			zone_start >= vol->mft_zone_end)) {
209		zone_start = vol->mft_lcn;
210		if (!vol->mft_zone_end)
211			zone_start = 0;
212		/*
213		 * Starting at beginning of volume which means a single pass
214		 * is sufficient.
215		 */
216		pass = 2;
217	}
218	if (zone == MFT_ZONE) {
219		zone_end = vol->mft_zone_end;
220		search_zone = 1;
221	} else /* if (zone == DATA_ZONE) */ {
222		/* Skip searching the mft zone. */
223		done_zones |= 1;
224		if (zone_start >= vol->mft_zone_end) {
225			zone_end = vol->nr_clusters;
226			search_zone = 2;
227		} else {
228			zone_end = vol->mft_zone_start;
229			search_zone = 4;
230		}
231	}
232	/*
233	 * bmp_pos is the current bit position inside the bitmap.  We use
234	 * bmp_initial_pos to determine whether or not to do a zone switch.
235	 */
236	bmp_pos = bmp_initial_pos = zone_start;
237
238	/* Loop until all clusters are allocated, i.e. clusters == 0. */
239	clusters = count;
240	rlpos = rlsize = 0;
241	mapping = lcnbmp_vi->i_mapping;
242	i_size = i_size_read(lcnbmp_vi);
243	while (1) {
244		ntfs_debug("Start of outer while loop: done_zones 0x%x, "
245				"search_zone %i, pass %i, zone_start 0x%llx, "
246				"zone_end 0x%llx, bmp_initial_pos 0x%llx, "
247				"bmp_pos 0x%llx, rlpos %i, rlsize %i.",
248				done_zones, search_zone, pass,
249				(unsigned long long)zone_start,
250				(unsigned long long)zone_end,
251				(unsigned long long)bmp_initial_pos,
252				(unsigned long long)bmp_pos, rlpos, rlsize);
253		/* Loop until we run out of free clusters. */
254		last_read_pos = bmp_pos >> 3;
255		ntfs_debug("last_read_pos 0x%llx.",
256				(unsigned long long)last_read_pos);
257		if (last_read_pos > i_size) {
258			ntfs_debug("End of attribute reached.  "
259					"Skipping to zone_pass_done.");
260			goto zone_pass_done;
261		}
262		if (likely(page)) {
263			if (need_writeback) {
264				ntfs_debug("Marking page dirty.");
265				flush_dcache_page(page);
266				set_page_dirty(page);
267				need_writeback = 0;
268			}
269			ntfs_unmap_page(page);
270		}
271		page = ntfs_map_page(mapping, last_read_pos >>
272				PAGE_SHIFT);
273		if (IS_ERR(page)) {
274			err = PTR_ERR(page);
275			ntfs_error(vol->sb, "Failed to map page.");
276			goto out;
277		}
278		buf_size = last_read_pos & ~PAGE_MASK;
279		buf = page_address(page) + buf_size;
280		buf_size = PAGE_SIZE - buf_size;
281		if (unlikely(last_read_pos + buf_size > i_size))
282			buf_size = i_size - last_read_pos;
283		buf_size <<= 3;
284		lcn = bmp_pos & 7;
285		bmp_pos &= ~(LCN)7;
286		ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
287				"bmp_pos 0x%llx, need_writeback %i.", buf_size,
288				(unsigned long long)lcn,
289				(unsigned long long)bmp_pos, need_writeback);
290		while (lcn < buf_size && lcn + bmp_pos < zone_end) {
291			byte = buf + (lcn >> 3);
292			ntfs_debug("In inner while loop: buf_size %i, "
293					"lcn 0x%llx, bmp_pos 0x%llx, "
294					"need_writeback %i, byte ofs 0x%x, "
295					"*byte 0x%x.", buf_size,
296					(unsigned long long)lcn,
297					(unsigned long long)bmp_pos,
298					need_writeback,
299					(unsigned int)(lcn >> 3),
300					(unsigned int)*byte);
301			/* Skip full bytes. */
302			if (*byte == 0xff) {
303				lcn = (lcn + 8) & ~(LCN)7;
304				ntfs_debug("Continuing while loop 1.");
305				continue;
306			}
307			bit = 1 << (lcn & 7);
308			ntfs_debug("bit 0x%x.", bit);
309			/* If the bit is already set, go onto the next one. */
310			if (*byte & bit) {
311				lcn++;
312				ntfs_debug("Continuing while loop 2.");
313				continue;
314			}
315			/*
316			 * Allocate more memory if needed, including space for
317			 * the terminator element.
318			 * ntfs_malloc_nofs() operates on whole pages only.
319			 */
320			if ((rlpos + 2) * sizeof(*rl) > rlsize) {
321				runlist_element *rl2;
322
323				ntfs_debug("Reallocating memory.");
324				if (!rl)
325					ntfs_debug("First free bit is at LCN "
326							"0x%llx.",
327							(unsigned long long)
328							(lcn + bmp_pos));
329				rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
330				if (unlikely(!rl2)) {
331					err = -ENOMEM;
332					ntfs_error(vol->sb, "Failed to "
333							"allocate memory.");
334					goto out;
335				}
336				memcpy(rl2, rl, rlsize);
337				ntfs_free(rl);
338				rl = rl2;
339				rlsize += PAGE_SIZE;
340				ntfs_debug("Reallocated memory, rlsize 0x%x.",
341						rlsize);
342			}
343			/* Allocate the bitmap bit. */
344			*byte |= bit;
345			/* We need to write this bitmap page to disk. */
346			need_writeback = 1;
347			ntfs_debug("*byte 0x%x, need_writeback is set.",
348					(unsigned int)*byte);
349			/*
350			 * Coalesce with previous run if adjacent LCNs.
351			 * Otherwise, append a new run.
352			 */
353			ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), "
354					"prev_lcn 0x%llx, lcn 0x%llx, "
355					"bmp_pos 0x%llx, prev_run_len 0x%llx, "
356					"rlpos %i.",
357					(unsigned long long)(lcn + bmp_pos),
358					1ULL, (unsigned long long)prev_lcn,
359					(unsigned long long)lcn,
360					(unsigned long long)bmp_pos,
361					(unsigned long long)prev_run_len,
362					rlpos);
363			if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
364				ntfs_debug("Coalescing to run (lcn 0x%llx, "
365						"len 0x%llx).",
366						(unsigned long long)
367						rl[rlpos - 1].lcn,
368						(unsigned long long)
369						rl[rlpos - 1].length);
370				rl[rlpos - 1].length = ++prev_run_len;
371				ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), "
372						"prev_run_len 0x%llx.",
373						(unsigned long long)
374						rl[rlpos - 1].lcn,
375						(unsigned long long)
376						rl[rlpos - 1].length,
377						(unsigned long long)
378						prev_run_len);
379			} else {
380				if (likely(rlpos)) {
381					ntfs_debug("Adding new run, (previous "
382							"run lcn 0x%llx, "
383							"len 0x%llx).",
384							(unsigned long long)
385							rl[rlpos - 1].lcn,
386							(unsigned long long)
387							rl[rlpos - 1].length);
388					rl[rlpos].vcn = rl[rlpos - 1].vcn +
389							prev_run_len;
390				} else {
391					ntfs_debug("Adding new run, is first "
392							"run.");
393					rl[rlpos].vcn = start_vcn;
394				}
395				rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
396				rl[rlpos].length = prev_run_len = 1;
397				rlpos++;
398			}
399			/* Done? */
400			if (!--clusters) {
401				LCN tc;
402				/*
403				 * Update the current zone position.  Positions
404				 * of already scanned zones have been updated
405				 * during the respective zone switches.
406				 */
407				tc = lcn + bmp_pos + 1;
408				ntfs_debug("Done. Updating current zone "
409						"position, tc 0x%llx, "
410						"search_zone %i.",
411						(unsigned long long)tc,
412						search_zone);
413				switch (search_zone) {
414				case 1:
415					ntfs_debug("Before checks, "
416							"vol->mft_zone_pos "
417							"0x%llx.",
418							(unsigned long long)
419							vol->mft_zone_pos);
420					if (tc >= vol->mft_zone_end) {
421						vol->mft_zone_pos =
422								vol->mft_lcn;
423						if (!vol->mft_zone_end)
424							vol->mft_zone_pos = 0;
425					} else if ((bmp_initial_pos >=
426							vol->mft_zone_pos ||
427							tc > vol->mft_zone_pos)
428							&& tc >= vol->mft_lcn)
429						vol->mft_zone_pos = tc;
430					ntfs_debug("After checks, "
431							"vol->mft_zone_pos "
432							"0x%llx.",
433							(unsigned long long)
434							vol->mft_zone_pos);
435					break;
436				case 2:
437					ntfs_debug("Before checks, "
438							"vol->data1_zone_pos "
439							"0x%llx.",
440							(unsigned long long)
441							vol->data1_zone_pos);
442					if (tc >= vol->nr_clusters)
443						vol->data1_zone_pos =
444							     vol->mft_zone_end;
445					else if ((bmp_initial_pos >=
446						    vol->data1_zone_pos ||
447						    tc > vol->data1_zone_pos)
448						    && tc >= vol->mft_zone_end)
449						vol->data1_zone_pos = tc;
450					ntfs_debug("After checks, "
451							"vol->data1_zone_pos "
452							"0x%llx.",
453							(unsigned long long)
454							vol->data1_zone_pos);
455					break;
456				case 4:
457					ntfs_debug("Before checks, "
458							"vol->data2_zone_pos "
459							"0x%llx.",
460							(unsigned long long)
461							vol->data2_zone_pos);
462					if (tc >= vol->mft_zone_start)
463						vol->data2_zone_pos = 0;
464					else if (bmp_initial_pos >=
465						      vol->data2_zone_pos ||
466						      tc > vol->data2_zone_pos)
467						vol->data2_zone_pos = tc;
468					ntfs_debug("After checks, "
469							"vol->data2_zone_pos "
470							"0x%llx.",
471							(unsigned long long)
472							vol->data2_zone_pos);
473					break;
474				default:
475					BUG();
476				}
477				ntfs_debug("Finished.  Going to out.");
478				goto out;
479			}
480			lcn++;
481		}
482		bmp_pos += buf_size;
483		ntfs_debug("After inner while loop: buf_size 0x%x, lcn "
484				"0x%llx, bmp_pos 0x%llx, need_writeback %i.",
485				buf_size, (unsigned long long)lcn,
486				(unsigned long long)bmp_pos, need_writeback);
487		if (bmp_pos < zone_end) {
488			ntfs_debug("Continuing outer while loop, "
489					"bmp_pos 0x%llx, zone_end 0x%llx.",
490					(unsigned long long)bmp_pos,
491					(unsigned long long)zone_end);
492			continue;
493		}
494zone_pass_done:	/* Finished with the current zone pass. */
495		ntfs_debug("At zone_pass_done, pass %i.", pass);
496		if (pass == 1) {
497			/*
498			 * Now do pass 2, scanning the first part of the zone
499			 * we omitted in pass 1.
500			 */
501			pass = 2;
502			zone_end = zone_start;
503			switch (search_zone) {
504			case 1: /* mft_zone */
505				zone_start = vol->mft_zone_start;
506				break;
507			case 2: /* data1_zone */
508				zone_start = vol->mft_zone_end;
509				break;
510			case 4: /* data2_zone */
511				zone_start = 0;
512				break;
513			default:
514				BUG();
515			}
516			/* Sanity check. */
517			if (zone_end < zone_start)
518				zone_end = zone_start;
519			bmp_pos = zone_start;
520			ntfs_debug("Continuing outer while loop, pass 2, "
521					"zone_start 0x%llx, zone_end 0x%llx, "
522					"bmp_pos 0x%llx.",
523					(unsigned long long)zone_start,
524					(unsigned long long)zone_end,
525					(unsigned long long)bmp_pos);
526			continue;
527		} /* pass == 2 */
528done_zones_check:
529		ntfs_debug("At done_zones_check, search_zone %i, done_zones "
530				"before 0x%x, done_zones after 0x%x.",
531				search_zone, done_zones,
532				done_zones | search_zone);
533		done_zones |= search_zone;
534		if (done_zones < 7) {
535			ntfs_debug("Switching zone.");
536			/* Now switch to the next zone we haven't done yet. */
537			pass = 1;
538			switch (search_zone) {
539			case 1:
540				ntfs_debug("Switching from mft zone to data1 "
541						"zone.");
542				/* Update mft zone position. */
543				if (rlpos) {
544					LCN tc;
545
546					ntfs_debug("Before checks, "
547							"vol->mft_zone_pos "
548							"0x%llx.",
549							(unsigned long long)
550							vol->mft_zone_pos);
551					tc = rl[rlpos - 1].lcn +
552							rl[rlpos - 1].length;
553					if (tc >= vol->mft_zone_end) {
554						vol->mft_zone_pos =
555								vol->mft_lcn;
556						if (!vol->mft_zone_end)
557							vol->mft_zone_pos = 0;
558					} else if ((bmp_initial_pos >=
559							vol->mft_zone_pos ||
560							tc > vol->mft_zone_pos)
561							&& tc >= vol->mft_lcn)
562						vol->mft_zone_pos = tc;
563					ntfs_debug("After checks, "
564							"vol->mft_zone_pos "
565							"0x%llx.",
566							(unsigned long long)
567							vol->mft_zone_pos);
568				}
569				/* Switch from mft zone to data1 zone. */
570switch_to_data1_zone:		search_zone = 2;
571				zone_start = bmp_initial_pos =
572						vol->data1_zone_pos;
573				zone_end = vol->nr_clusters;
574				if (zone_start == vol->mft_zone_end)
575					pass = 2;
576				if (zone_start >= zone_end) {
577					vol->data1_zone_pos = zone_start =
578							vol->mft_zone_end;
579					pass = 2;
580				}
581				break;
582			case 2:
583				ntfs_debug("Switching from data1 zone to "
584						"data2 zone.");
585				/* Update data1 zone position. */
586				if (rlpos) {
587					LCN tc;
588
589					ntfs_debug("Before checks, "
590							"vol->data1_zone_pos "
591							"0x%llx.",
592							(unsigned long long)
593							vol->data1_zone_pos);
594					tc = rl[rlpos - 1].lcn +
595							rl[rlpos - 1].length;
596					if (tc >= vol->nr_clusters)
597						vol->data1_zone_pos =
598							     vol->mft_zone_end;
599					else if ((bmp_initial_pos >=
600						    vol->data1_zone_pos ||
601						    tc > vol->data1_zone_pos)
602						    && tc >= vol->mft_zone_end)
603						vol->data1_zone_pos = tc;
604					ntfs_debug("After checks, "
605							"vol->data1_zone_pos "
606							"0x%llx.",
607							(unsigned long long)
608							vol->data1_zone_pos);
609				}
610				/* Switch from data1 zone to data2 zone. */
611				search_zone = 4;
612				zone_start = bmp_initial_pos =
613						vol->data2_zone_pos;
614				zone_end = vol->mft_zone_start;
615				if (!zone_start)
616					pass = 2;
617				if (zone_start >= zone_end) {
618					vol->data2_zone_pos = zone_start =
619							bmp_initial_pos = 0;
620					pass = 2;
621				}
622				break;
623			case 4:
624				ntfs_debug("Switching from data2 zone to "
625						"data1 zone.");
626				/* Update data2 zone position. */
627				if (rlpos) {
628					LCN tc;
629
630					ntfs_debug("Before checks, "
631							"vol->data2_zone_pos "
632							"0x%llx.",
633							(unsigned long long)
634							vol->data2_zone_pos);
635					tc = rl[rlpos - 1].lcn +
636							rl[rlpos - 1].length;
637					if (tc >= vol->mft_zone_start)
638						vol->data2_zone_pos = 0;
639					else if (bmp_initial_pos >=
640						      vol->data2_zone_pos ||
641						      tc > vol->data2_zone_pos)
642						vol->data2_zone_pos = tc;
643					ntfs_debug("After checks, "
644							"vol->data2_zone_pos "
645							"0x%llx.",
646							(unsigned long long)
647							vol->data2_zone_pos);
648				}
649				/* Switch from data2 zone to data1 zone. */
650				goto switch_to_data1_zone;
651			default:
652				BUG();
653			}
654			ntfs_debug("After zone switch, search_zone %i, "
655					"pass %i, bmp_initial_pos 0x%llx, "
656					"zone_start 0x%llx, zone_end 0x%llx.",
657					search_zone, pass,
658					(unsigned long long)bmp_initial_pos,
659					(unsigned long long)zone_start,
660					(unsigned long long)zone_end);
661			bmp_pos = zone_start;
662			if (zone_start == zone_end) {
663				ntfs_debug("Empty zone, going to "
664						"done_zones_check.");
665				/* Empty zone. Don't bother searching it. */
666				goto done_zones_check;
667			}
668			ntfs_debug("Continuing outer while loop.");
669			continue;
670		} /* done_zones == 7 */
671		ntfs_debug("All zones are finished.");
672		/*
673		 * All zones are finished!  If DATA_ZONE, shrink mft zone.  If
674		 * MFT_ZONE, we have really run out of space.
675		 */
676		mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
677		ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end "
678				"0x%llx, mft_zone_size 0x%llx.",
679				(unsigned long long)vol->mft_zone_start,
680				(unsigned long long)vol->mft_zone_end,
681				(unsigned long long)mft_zone_size);
682		if (zone == MFT_ZONE || mft_zone_size <= 0) {
683			ntfs_debug("No free clusters left, going to out.");
684			/* Really no more space left on device. */
685			err = -ENOSPC;
686			goto out;
687		} /* zone == DATA_ZONE && mft_zone_size > 0 */
688		ntfs_debug("Shrinking mft zone.");
689		zone_end = vol->mft_zone_end;
690		mft_zone_size >>= 1;
691		if (mft_zone_size > 0)
692			vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
693		else /* mft zone and data2 zone no longer exist. */
694			vol->data2_zone_pos = vol->mft_zone_start =
695					vol->mft_zone_end = 0;
696		if (vol->mft_zone_pos >= vol->mft_zone_end) {
697			vol->mft_zone_pos = vol->mft_lcn;
698			if (!vol->mft_zone_end)
699				vol->mft_zone_pos = 0;
700		}
701		bmp_pos = zone_start = bmp_initial_pos =
702				vol->data1_zone_pos = vol->mft_zone_end;
703		search_zone = 2;
704		pass = 2;
705		done_zones &= ~2;
706		ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, "
707				"vol->mft_zone_start 0x%llx, "
708				"vol->mft_zone_end 0x%llx, "
709				"vol->mft_zone_pos 0x%llx, search_zone 2, "
710				"pass 2, dones_zones 0x%x, zone_start 0x%llx, "
711				"zone_end 0x%llx, vol->data1_zone_pos 0x%llx, "
712				"continuing outer while loop.",
713				(unsigned long long)mft_zone_size,
714				(unsigned long long)vol->mft_zone_start,
715				(unsigned long long)vol->mft_zone_end,
716				(unsigned long long)vol->mft_zone_pos,
717				done_zones, (unsigned long long)zone_start,
718				(unsigned long long)zone_end,
719				(unsigned long long)vol->data1_zone_pos);
720	}
721	ntfs_debug("After outer while loop.");
722out:
723	ntfs_debug("At out.");
724	/* Add runlist terminator element. */
725	if (likely(rl)) {
726		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
727		rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
728		rl[rlpos].length = 0;
729	}
730	if (likely(page && !IS_ERR(page))) {
731		if (need_writeback) {
732			ntfs_debug("Marking page dirty.");
733			flush_dcache_page(page);
734			set_page_dirty(page);
735			need_writeback = 0;
736		}
737		ntfs_unmap_page(page);
738	}
739	if (likely(!err)) {
740		up_write(&vol->lcnbmp_lock);
741		ntfs_debug("Done.");
742		return rl;
743	}
744	ntfs_error(vol->sb, "Failed to allocate clusters, aborting "
745			"(error %i).", err);
746	if (rl) {
747		int err2;
748
749		if (err == -ENOSPC)
750			ntfs_debug("Not enough space to complete allocation, "
751					"err -ENOSPC, first free lcn 0x%llx, "
752					"could allocate up to 0x%llx "
753					"clusters.",
754					(unsigned long long)rl[0].lcn,
755					(unsigned long long)(count - clusters));
756		/* Deallocate all allocated clusters. */
757		ntfs_debug("Attempting rollback...");
758		err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
759		if (err2) {
760			ntfs_error(vol->sb, "Failed to rollback (error %i).  "
761					"Leaving inconsistent metadata!  "
762					"Unmount and run chkdsk.", err2);
763			NVolSetErrors(vol);
764		}
765		/* Free the runlist. */
766		ntfs_free(rl);
767	} else if (err == -ENOSPC)
768		ntfs_debug("No space left at all, err = -ENOSPC, first free "
769				"lcn = 0x%llx.",
770				(long long)vol->data1_zone_pos);
771	up_write(&vol->lcnbmp_lock);
772	return ERR_PTR(err);
773}
774
775/**
776 * __ntfs_cluster_free - free clusters on an ntfs volume
777 * @ni:		ntfs inode whose runlist describes the clusters to free
778 * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
779 * @count:	number of clusters to free or -1 for all clusters
780 * @ctx:	active attribute search context if present or NULL if not
781 * @is_rollback:	true if this is a rollback operation
782 *
783 * Free @count clusters starting at the cluster @start_vcn in the runlist
784 * described by the vfs inode @ni.
785 *
786 * If @count is -1, all clusters from @start_vcn to the end of the runlist are
787 * deallocated.  Thus, to completely free all clusters in a runlist, use
788 * @start_vcn = 0 and @count = -1.
789 *
790 * If @ctx is specified, it is an active search context of @ni and its base mft
791 * record.  This is needed when __ntfs_cluster_free() encounters unmapped
792 * runlist fragments and allows their mapping.  If you do not have the mft
793 * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
794 * perform the necessary mapping and unmapping.
795 *
796 * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
797 * before returning.  Thus, @ctx will be left pointing to the same attribute on
798 * return as on entry.  However, the actual pointers in @ctx may point to
799 * different memory locations on return, so you must remember to reset any
800 * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
801 * you will probably want to do:
802 *	m = ctx->mrec;
803 *	a = ctx->attr;
804 * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
805 * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
806 *
807 * @is_rollback should always be 'false', it is for internal use to rollback
808 * errors.  You probably want to use ntfs_cluster_free() instead.
809 *
810 * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
811 * remove from the runlist or mark sparse the freed runs later.
812 *
813 * Return the number of deallocated clusters (not counting sparse ones) on
814 * success and -errno on error.
815 *
816 * WARNING: If @ctx is supplied, regardless of whether success or failure is
817 *	    returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
818 *	    is no longer valid, i.e. you need to either call
819 *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
820 *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
821 *	    why the mapping of the old inode failed.
822 *
823 * Locking: - The runlist described by @ni must be locked for writing on entry
824 *	      and is locked on return.  Note the runlist may be modified when
825 *	      needed runlist fragments need to be mapped.
826 *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
827 *	      on return.
828 *	    - This function takes the volume lcn bitmap lock for writing and
829 *	      modifies the bitmap contents.
830 *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
831 *	      entry and it will be left unmapped on return.
832 *	    - If @ctx is not NULL, the base mft record must be mapped on entry
833 *	      and it will be left mapped on return.
834 */
835s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
836		ntfs_attr_search_ctx *ctx, const bool is_rollback)
837{
838	s64 delta, to_free, total_freed, real_freed;
839	ntfs_volume *vol;
840	struct inode *lcnbmp_vi;
841	runlist_element *rl;
842	int err;
843
844	BUG_ON(!ni);
845	ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
846			"0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
847			(unsigned long long)count,
848			is_rollback ? " (rollback)" : "");
849	vol = ni->vol;
850	lcnbmp_vi = vol->lcnbmp_ino;
851	BUG_ON(!lcnbmp_vi);
852	BUG_ON(start_vcn < 0);
853	BUG_ON(count < -1);
854	/*
855	 * Lock the lcn bitmap for writing but only if not rolling back.  We
856	 * must hold the lock all the way including through rollback otherwise
857	 * rollback is not possible because once we have cleared a bit and
858	 * dropped the lock, anyone could have set the bit again, thus
859	 * allocating the cluster for another use.
860	 */
861	if (likely(!is_rollback))
862		down_write(&vol->lcnbmp_lock);
863
864	total_freed = real_freed = 0;
865
866	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
867	if (IS_ERR(rl)) {
868		if (!is_rollback)
869			ntfs_error(vol->sb, "Failed to find first runlist "
870					"element (error %li), aborting.",
871					PTR_ERR(rl));
872		err = PTR_ERR(rl);
873		goto err_out;
874	}
875	if (unlikely(rl->lcn < LCN_HOLE)) {
876		if (!is_rollback)
877			ntfs_error(vol->sb, "First runlist element has "
878					"invalid lcn, aborting.");
879		err = -EIO;
880		goto err_out;
881	}
882	/* Find the starting cluster inside the run that needs freeing. */
883	delta = start_vcn - rl->vcn;
884
885	/* The number of clusters in this run that need freeing. */
886	to_free = rl->length - delta;
887	if (count >= 0 && to_free > count)
888		to_free = count;
889
890	if (likely(rl->lcn >= 0)) {
891		/* Do the actual freeing of the clusters in this run. */
892		err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
893				to_free, likely(!is_rollback) ? 0 : 1);
894		if (unlikely(err)) {
895			if (!is_rollback)
896				ntfs_error(vol->sb, "Failed to clear first run "
897						"(error %i), aborting.", err);
898			goto err_out;
899		}
900		/* We have freed @to_free real clusters. */
901		real_freed = to_free;
902	};
903	/* Go to the next run and adjust the number of clusters left to free. */
904	++rl;
905	if (count >= 0)
906		count -= to_free;
907
908	/* Keep track of the total "freed" clusters, including sparse ones. */
909	total_freed = to_free;
910	/*
911	 * Loop over the remaining runs, using @count as a capping value, and
912	 * free them.
913	 */
914	for (; rl->length && count != 0; ++rl) {
915		if (unlikely(rl->lcn < LCN_HOLE)) {
916			VCN vcn;
917
918			/* Attempt to map runlist. */
919			vcn = rl->vcn;
920			rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
921			if (IS_ERR(rl)) {
922				err = PTR_ERR(rl);
923				if (!is_rollback)
924					ntfs_error(vol->sb, "Failed to map "
925							"runlist fragment or "
926							"failed to find "
927							"subsequent runlist "
928							"element.");
929				goto err_out;
930			}
931			if (unlikely(rl->lcn < LCN_HOLE)) {
932				if (!is_rollback)
933					ntfs_error(vol->sb, "Runlist element "
934							"has invalid lcn "
935							"(0x%llx).",
936							(unsigned long long)
937							rl->lcn);
938				err = -EIO;
939				goto err_out;
940			}
941		}
942		/* The number of clusters in this run that need freeing. */
943		to_free = rl->length;
944		if (count >= 0 && to_free > count)
945			to_free = count;
946
947		if (likely(rl->lcn >= 0)) {
948			/* Do the actual freeing of the clusters in the run. */
949			err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
950					to_free, likely(!is_rollback) ? 0 : 1);
951			if (unlikely(err)) {
952				if (!is_rollback)
953					ntfs_error(vol->sb, "Failed to clear "
954							"subsequent run.");
955				goto err_out;
956			}
957			/* We have freed @to_free real clusters. */
958			real_freed += to_free;
959		}
960		/* Adjust the number of clusters left to free. */
961		if (count >= 0)
962			count -= to_free;
963
964		/* Update the total done clusters. */
965		total_freed += to_free;
966	}
967	if (likely(!is_rollback))
968		up_write(&vol->lcnbmp_lock);
969
970	BUG_ON(count > 0);
971
972	/* We are done.  Return the number of actually freed clusters. */
973	ntfs_debug("Done.");
974	return real_freed;
975err_out:
976	if (is_rollback)
977		return err;
978	/* If no real clusters were freed, no need to rollback. */
979	if (!real_freed) {
980		up_write(&vol->lcnbmp_lock);
981		return err;
982	}
983	/*
984	 * Attempt to rollback and if that succeeds just return the error code.
985	 * If rollback fails, set the volume errors flag, emit an error
986	 * message, and return the error code.
987	 */
988	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
989	if (delta < 0) {
990		ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
991				"inconsistent metadata!  Unmount and run "
992				"chkdsk.", (int)delta);
993		NVolSetErrors(vol);
994	}
995	up_write(&vol->lcnbmp_lock);
996	ntfs_error(vol->sb, "Aborting (error %i).", err);
997	return err;
998}
999
1000#endif /* NTFS_RW */
1001