1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include <math.h>
9#include <sys/types.h>
10#include <unistd.h>
11#include <sys/stat.h>
12#include <fcntl.h>
13#include <errno.h>
14char tdir[256];
15char path[256];
16long stats = 0;
17
18void print_usage()
19{
20	printf("
21This program creates files in a tree of random depth and branching. Files vary
22in size randomly according to a distribution function which seems to model real
23file systems.  This distribution function has a median size of median_file_size
24(Median file size is hypothesized to be proportional to the average per file
25space wastage. Notice how that implies that with a more efficient file system
26file size usage patterns will in the long term move to a lower median file
27size), and a maximum size of max_file_size.  Directories vary in size according
28to the same distribution function but with separate parameters to control median
29and maximum size for the number of files within them, and the number of
30subdirectories within them.  This program prunes some empty subdirectories in a
31way that causes parents of leaf directories to branch less than
32median_dir_branching.
33
34 To avoid having one large file distort the results such that you have
35to benchmark many times set max_file_size to not more than
36bytes_to_consume/10.  If maximum/median is a small integer, then
37randomness is very poor.  This is a bug, Nikita, please find some
38clever way to fix it.  If it is 0, then the program crashes....
39
40For isolating performance consequences of design variations on
41particular file or directory size ranges, try setting their median size and
42max_size to both equal the max size of the file size range you want
43to test.
44
45To avoid having one large file distort the results set max_file_size to
46not more than bytes_to_consume/10.  Using a distribution function for
47the sizes of writes would be a natural next step in developing this program.\n\n");
48
49	printf
50	    ("Usage: reiser_fract_tree bytes_to_consume median_file_size max_file_size median_dir_nr_files max_directory_nr_files median_dir_branching max_dir_branching write_buffer_size /testfs_mount_point print_stats_flag\n\n");
51}
52
53/* #define DEBUG */
54
55char *write_buffer;		/* buffer from which we write */
56int write_buffer_size = 0;	/* gets reset to an argv  */
57static int already_whined = 0;	/* keep out of disk space errors from being
58				   endless by tracking whether we already
59				   printed the message */
60long bytes_to_consume = 0;	/* create files until their total number of
61				   bytes exceeds this number, but not by more
62				   than 1/10th */
63long byte_total = 0;		/* bytes created so far */
64
65/* statistics on sizes of files we attempted to create */
66int fsz_0_100 = 0;
67int fsz_100_1k = 0;
68int fsz_1k_10k = 0;
69int fsz_10k_100k = 0;
70int fsz_100k_1m = 0;
71int fsz_1m_10m = 0;
72int fsz_10m_larger = 0;
73
74void chngdir(char *name)
75{
76	int i;
77
78	if (name[0] == '.' && name[1] == '.') {
79		for (i = strlen(path); i > 0; i--) {
80			if (path[i] == '/') {
81				path[i] = 0;
82				break;
83			}
84		}
85	} else {
86		strcat(path, "/");
87		strcat(path, name);
88	}
89}
90
91/* this is the core statistical distribution function, and it is used for file
92   sizes, directory sizes, etc. */
93int determine_size(double median_size,
94		   double max_size /* The maximal value of size */ )
95{
96	/* when x is half of its random range (max_size/median_size), result is
97	   median_size */
98	int nr_random, granularity_reducer;
99	double size, double_nr_random;
100
101	/* it is a feature for us that this repeats identically every time it is run,
102	   as otherwise meaningless variances would affect our results and require us
103	   to use a higher number of benchmarks to achieve low noise results.  */
104	nr_random = rand();
105	median_size++;		/* avoids divide by zero errors */
106
107	/* this code does poorly when max_size is not a lot more than median size,
108	   and that needs fixing */
109
110	/* THE NEXT 2 LINES ARE THE HEART OF THE PROGRAM */
111
112	/* keep x below the value that when multiplied by median size on the next
113	   line will equal max_size */
114	/* the granularity_reducer is to handle the case where max_size is near
115	   median_size, since '%' can only take ints, we need this complicated what
116	   of handling that for small values of max_size/median_size by making
117	   large ints out of small ints temporarily.  */
118	if (max_size / median_size < 1024)
119		granularity_reducer = 1024 * 1024;
120	else
121		granularity_reducer = 1;
122	nr_random =
123	    nr_random %
124	    ((int)
125	     (granularity_reducer *
126	      (((double)max_size) / ((double)median_size))));
127	double_nr_random = ((double)nr_random) / (granularity_reducer);
128	size =
129	    median_size * (1 /
130			   (1 -
131			    (double_nr_random) / (((double)max_size) /
132						  ((double)median_size))) - 1);
133	return ((int)size);
134}
135
136/* generate a unique filename */
137void get_name_by_number(long this_files_number, char *str)
138{
139	sprintf(str, "%lu", this_files_number);
140}
141
142/* make a file of a specified size */
143void make_file(int size)
144{
145	char string[128] = { 0 };
146	char *str = string;
147	char fname[256];
148	int fd = 0;
149	int error;
150	static long this_files_number = 1;
151
152	/* collect statistics about the size of files created, or more precisely, the
153	   size of files that we will attempt to create. */
154	if (size <= 100)
155		fsz_0_100++;
156	else if (size <= 1000)
157		fsz_100_1k++;
158	else if (size <= 10 * 1000)
159		fsz_1k_10k++;
160	else if (size <= 100 * 1000)
161		fsz_10k_100k++;
162	else if (size <= 1000 * 1000)
163		fsz_100k_1m++;
164	else if (size <= 10 * 1000 * 1000)
165		fsz_1m_10m++;
166	else
167		fsz_10m_larger++;
168
169	/* construct a name for the file */
170	get_name_by_number(this_files_number++, str);
171	strcpy(fname, path);
172	strcat(fname, "/");
173	strcat(fname, str);
174
175	/* open the file, and deal with the various errors that can occur */
176
177	if ((fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0777)) == -1) {
178		if (errno == ENOSPC) {
179			if (!already_whined) {
180				printf
181				    ("reiser-2021A: out of disk (or inodes) space, will keep trying\n");
182				already_whined = 1;	/* we continue other file creation in out of
183							   space conditions */
184			}
185			return;
186		}
187		/*  it is sometimes useful to be able to run this program more than once
188		   inside the same directory, and that means skipping over filenames that
189		   already exist.  Thus we ignore EEXIST, and pay attention to all
190		   else. */
191		if (errno == EEXIST) {	/* just skip existing file */
192			return;
193		}
194		perror("open");
195		exit(errno);
196	}
197	/* write to the file until it is the right size, handling the various error
198	   conditions appropriately */
199
200	while (size > 0) {
201		size -= (error =
202			 write(fd, write_buffer,
203			       (size <
204				write_buffer_size -
205				1) ? size : (write_buffer_size - 1)));
206		if (error == -1) {
207			if (errno == ENOSPC) {
208				if (!already_whined) {
209					printf
210					    ("reiser-2022: out of disk space, will keep trying\n");
211					already_whined = 1;
212				}
213				close(fd);
214				return;
215			}
216			perror("write() failed");
217			exit(errno);
218		}
219	}
220
221	/* close the file */
222	if (close(fd)) {
223		perror("close() failed");
224		exit(errno);
225	}
226}
227
228/* print the statistics on how many files were created of what size */
229
230void print_stats()
231{
232	if (!stats)
233		return;
234
235	printf("\n");
236	printf("File stats: Units are decimal (1k = 1000)\n");
237	printf("files 0-100    : %i\n", fsz_0_100);
238	printf("files 100-1K   : %i\n", fsz_100_1k);
239	printf("files 1K-10K   : %i\n", fsz_1k_10k);
240	printf("files 10K-100K : %i\n", fsz_10k_100k);
241	printf("files 100K-1M  : %i\n", fsz_100k_1m);
242	printf("files 1M-10M    : %i\n", fsz_1m_10m);
243	printf("files 10M-larger    : %i\n", fsz_10m_larger);
244	printf("total bytes written    : %lu\n", byte_total);
245
246}
247
248/* predict the number of files that will be created before max_bytes total
249   length of files is reached */
250long determine_nr_of_files(int median_file_size, double max_file_size,
251			   long bytes_to_consume)
252{
253	long nr_of_files = 0, byte_total = 0;
254
255	/* the next line is not necessary as 1 is the default, it is just cautious
256	   coding */
257	srand(1);
258	while (byte_total < bytes_to_consume) {
259		byte_total += determine_size(median_file_size, max_file_size);
260		nr_of_files++;
261	}
262	/* reset the random number generator so that when we determine_size() of the
263	   files later they will be created with the same "random" sequence used in
264	   this calculation */
265	srand(1);
266#ifdef DEBUG
267	printf("number of files is %d\n", (int)nr_of_files);
268#endif /* DEBUG */
269	fflush(NULL);
270	return nr_of_files;
271}
272
273/* fill the current working directory with nr_files_this_directory number of
274   files*/
275
276void fill_this_directory(long nr_files_this_directory, long median_file_size,
277			 long maximum_size)
278{
279	long size;
280
281#ifdef DEBUG
282	printf("filling with %lu files, ", nr_files_this_directory);
283#endif
284	while (nr_files_this_directory--) {
285		size = determine_size(median_file_size, maximum_size);
286		byte_total += size;
287		make_file(size);
288	}
289}
290
291/* this will unfortunately handle out of disk space by forever trying */
292/* What we should do in out of space situaltion ? I think we must skip this
293   directory and continue files/dirs creation process. Error value (!= 0)
294   indicates that we can't go to this directory. -zam */
295int make_directory(char *dirname)
296{
297	static long this_directory_number = 0;
298
299	strcpy(tdir, path);
300	strcat(tdir, "/");
301	strcat(tdir, dirname);
302
303	if (mkdir(tdir, 0755) == -1) {
304		if (errno == ENOSPC) {
305			if (!already_whined) {
306				printf("reiser-2021: out of disk space, ");
307				already_whined = 1;
308			}
309			return errno;
310		}
311		/*  it is sometimes useful to be able to run this program more than once
312		   inside the same directory, and that means skipping over filenames that
313		   already exist.  Thus we ignore EEXIST, and pay attention to all else. */
314		if (errno != EEXIST) {
315			perror("mkdir");
316			exit(errno);
317		}
318	}
319	sprintf(dirname, "d%lu", this_directory_number++);
320	strcpy(tdir, path);
321	strcat(tdir, "/");
322	strcat(tdir, dirname);
323
324	return 0;
325}
326
327/* assumes we are already chdir'd into a directory that the subtree is rooted
328   at.  Fills the directory with files and subdirectories, cd's into those
329   subdirectories, and recurses upon itself */
330
331void do_subtree(
332		       /* the start and end of the portion of the directory sizes
333		          array which corresponds to the sizes of the directories
334		          composing this subtree */
335		       /* sizes_end minus sizes_start is equal to the number of
336		          directories in this subtree */
337		       long *sizes_start, long *sizes_end,
338		       long median_file_size, long maximum_file_size,
339		       long median_dir_branching, long max_dir_branching)
340{
341	long *p;
342	long *sub_start;
343	long *sub_end;
344	int index_subdirectory_to_add_directory_to;
345	long *dirs_in_subtrees;
346	char *subtree_name;
347	long *sizes_index = sizes_start;
348	char subtree_name_array[128];
349	long this_directory_branching;
350	static long this_directorys_number;
351
352	subtree_name = subtree_name_array;
353	/* fill this directory with its number of files */
354	fill_this_directory(*sizes_index, median_file_size, maximum_file_size);
355	sizes_index++;
356	/* ok, now randomly assign directories (and their number of files) among the
357	   subdirectories that will be created if at least one directory is assigned
358	   to it */
359
360	/* this will cause the random number sequence to not match the one used in
361	   determine_nr_files() I need to accumulate my values in an array
362	   beforehand. I'll code that later.  */
363	/* worry about whether 0 or 1 is a problem value */
364	this_directory_branching =
365	    determine_size(median_dir_branching, max_dir_branching) + 1;
366
367	/* create an array holding the number of directories assigned to each
368	   potential subdirectory */
369	dirs_in_subtrees = calloc(this_directory_branching, sizeof(long));
370	while (sizes_index <= sizes_end) {
371		index_subdirectory_to_add_directory_to =
372		    (rand() % this_directory_branching);
373		(*
374		 (dirs_in_subtrees + index_subdirectory_to_add_directory_to))++;
375		sizes_index++;
376	}
377	/* the +1 is for the fill_directory() we did above */
378	sizes_index = sizes_start + 1;
379
380	/* go through each potential subdirectory, and if at least one directory has
381	   been assigned to it, create it and recurse */
382	for (p = dirs_in_subtrees;
383	     p < (dirs_in_subtrees + this_directory_branching); p++) {
384		if (*p) {
385			int nocd;
386			sprintf(subtree_name, "d%lu", this_directorys_number++);
387			nocd = make_directory(subtree_name);
388			/* if make_dir.. may fails (in out of space situation), we continue
389			   creation process in same dir */
390			if (!nocd)
391				chngdir(subtree_name);
392			sub_start = sizes_index;
393			/* the minus one is because *p is the number of elements and arrays start at 0 */
394			sub_end = (sizes_index + (*p - 1));
395
396#ifdef DEBUG
397			/* comment this back in if the array logic has you going cross-eyed */
398			/*      printf ("sizes_start is %p, sizes_index is %p, sizes_index+p is %p, sizes_end is %p\n", sizes_start, sub_start, sub_end, sizes_end); */
399#endif
400			do_subtree(sub_start, sub_end, median_file_size,
401				   maximum_file_size, median_dir_branching,
402				   max_dir_branching);
403			if (!nocd)
404				chngdir("..");
405		}
406		sizes_index += *p;
407	}
408}
409
410/* We have already determined that nr_files can fit in bytes_to_consume space.
411   Fill the sizes array with the number of files to be in each directory, and
412   then call do_subtree to fill the tree with files and directories.  */
413
414void make_fractal_tree(long median_file_size, long maximum_file_size,
415		       long median_dir_nr_files, long max_dir_nr_files,
416		       long median_dir_branching, long max_dir_branching,
417		       long nr_files)
418{
419	long *sizes_start;
420	long *sizes_end;
421	long *sizes_index;
422	long remaining_files = nr_files;
423
424	/* collect together array of directory sizes for whole filesystem.  This
425	   cannot easily be done recursively without distorting the directory sizes
426	   and making deeper directories smaller.  Send me the code if you
427	   disagree.:-) */
428	/* we almost certainly don't need this much space, but so what.... */
429	sizes_index = sizes_start = malloc(nr_files * sizeof(long));
430	for (; remaining_files > 0;) {
431		*sizes_index =
432		    determine_size(median_dir_nr_files, max_dir_nr_files);
433		// we alloc space for nr_files, so we should avoid
434		// number of files in directory = 0 -grev.
435		if (*sizes_index == 0)
436			*sizes_index = 1;
437		*sizes_index =
438		    (*sizes_index <
439		     remaining_files) ? *sizes_index : remaining_files;
440
441#ifdef DEBUG
442		printf("*sizes_index == %lu, ", *sizes_index);
443#endif
444		remaining_files -= *sizes_index;
445		sizes_index++;
446	}
447	/* don't decrement below sizes_start if nr_files is 0 */
448	sizes_end = (sizes_index-- > sizes_start) ? sizes_index : sizes_start;
449
450	sizes_index = sizes_start;
451	srand(1);
452	do_subtree(sizes_start, sizes_end, median_file_size, maximum_file_size,
453		   median_dir_branching, max_dir_branching);
454
455}
456
457int main(int argc, char *argv[])
458{
459	/* initialized from argv[] */
460	long median_file_size,
461	    median_dir_branching,
462	    median_dir_nr_files,
463	    max_dir_nr_files, max_dir_branching, max_file_size;
464	long nr_of_files = 0;	/* files to be created */
465
466	if (argc != 11) {
467		print_usage();
468		exit(1);
469	}
470
471	write_buffer_size = atoi(argv[8]);
472	write_buffer = malloc(write_buffer_size);
473	memset(write_buffer, 'a', write_buffer_size);
474
475	/* the number of bytes that we desire this tree to consume.  It will actually
476	   consume more, because the last file will overshoot by a random amount, and
477	   because the directories and metadata will consume space.  */
478	bytes_to_consume = atol(argv[1]);
479	max_file_size = atol(argv[3]);
480	median_file_size = atol(argv[2]);
481	/* Figure out how many random files will fit into bytes_to_consume bytes. We
482	   depend on resetting rand() to get the same result later. */
483	nr_of_files =
484	    determine_nr_of_files(median_file_size, max_file_size,
485				  bytes_to_consume);
486
487	strcpy(path, argv[9]);
488	mkdir(path, 0755);
489	stats = atol(argv[10]);
490	median_dir_branching = atol(argv[6]);
491	max_dir_branching = atol(argv[7]);
492	median_dir_nr_files = atol(argv[4]);
493	max_dir_nr_files = atol(argv[5]);
494	make_fractal_tree(median_file_size, max_file_size, median_dir_nr_files,
495			  max_dir_nr_files, median_dir_branching,
496			  max_dir_branching, nr_of_files);
497	print_stats();
498	if (stats)
499		printf("\nreiser_fract_tree finished\n");
500
501	return 0;
502}
503