xref: /kernel/linux/linux-5.10/fs/orangefs/dir.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2017 Omnibond Systems, L.L.C.
4 */
5
6#include "protocol.h"
7#include "orangefs-kernel.h"
8#include "orangefs-bufmap.h"
9
10struct orangefs_dir_part {
11	struct orangefs_dir_part *next;
12	size_t len;
13};
14
15struct orangefs_dir {
16	__u64 token;
17	struct orangefs_dir_part *part;
18	loff_t end;
19	int error;
20};
21
22#define PART_SHIFT (24)
23#define PART_SIZE (1<<24)
24#define PART_MASK (~(PART_SIZE - 1))
25
26/*
27 * There can be up to 512 directory entries.  Each entry is encoded as
28 * follows:
29 * 4 bytes: string size (n)
30 * n bytes: string
31 * 1 byte: trailing zero
32 * padding to 8 bytes
33 * 16 bytes: khandle
34 * padding to 8 bytes
35 *
36 * The trailer_buf starts with a struct orangefs_readdir_response_s
37 * which must be skipped to get to the directory data.
38 *
39 * The data which is received from the userspace daemon is termed a
40 * part and is stored in a linked list in case more than one part is
41 * needed for a large directory.
42 *
43 * The position pointer (ctx->pos) encodes the part and offset on which
44 * to begin reading at.  Bits above PART_SHIFT encode the part and bits
45 * below PART_SHIFT encode the offset.  Parts are stored in a linked
46 * list which grows as data is received from the server.  The overhead
47 * associated with managing the list is presumed to be small compared to
48 * the overhead of communicating with the server.
49 *
50 * As data is received from the server, it is placed at the end of the
51 * part list.  Data is parsed from the current position as it is needed.
52 * When data is determined to be corrupt, it is either because the
53 * userspace component has sent back corrupt data or because the file
54 * pointer has been moved to an invalid location.  Since the two cannot
55 * be differentiated, return EIO.
56 *
57 * Part zero is synthesized to contains `.' and `..'.  Part one is the
58 * first part of the part list.
59 */
60
61static int do_readdir(struct orangefs_inode_s *oi,
62    struct orangefs_dir *od, struct dentry *dentry,
63    struct orangefs_kernel_op_s *op)
64{
65	struct orangefs_readdir_response_s *resp;
66	int bufi, r;
67
68	/*
69	 * Despite the badly named field, readdir does not use shared
70	 * memory.  However, there are a limited number of readdir
71	 * slots, which must be allocated here.  This flag simply tells
72	 * the op scheduler to return the op here for retry.
73	 */
74	op->uses_shared_memory = 1;
75	op->upcall.req.readdir.refn = oi->refn;
76	op->upcall.req.readdir.token = od->token;
77	op->upcall.req.readdir.max_dirent_count =
78	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
79
80again:
81	bufi = orangefs_readdir_index_get();
82	if (bufi < 0) {
83		od->error = bufi;
84		return bufi;
85	}
86
87	op->upcall.req.readdir.buf_index = bufi;
88
89	r = service_operation(op, "orangefs_readdir",
90	    get_interruptible_flag(dentry->d_inode));
91
92	orangefs_readdir_index_put(bufi);
93
94	if (op_state_purged(op)) {
95		if (r == -EAGAIN) {
96			vfree(op->downcall.trailer_buf);
97			goto again;
98		} else if (r == -EIO) {
99			vfree(op->downcall.trailer_buf);
100			od->error = r;
101			return r;
102		}
103	}
104
105	if (r < 0) {
106		vfree(op->downcall.trailer_buf);
107		od->error = r;
108		return r;
109	} else if (op->downcall.status) {
110		vfree(op->downcall.trailer_buf);
111		od->error = op->downcall.status;
112		return op->downcall.status;
113	}
114
115	/*
116	 * The maximum size is size per entry times the 512 entries plus
117	 * the header.  This is well under the limit.
118	 */
119	if (op->downcall.trailer_size > PART_SIZE) {
120		vfree(op->downcall.trailer_buf);
121		od->error = -EIO;
122		return -EIO;
123	}
124
125	resp = (struct orangefs_readdir_response_s *)
126	    op->downcall.trailer_buf;
127	od->token = resp->token;
128	return 0;
129}
130
131static int parse_readdir(struct orangefs_dir *od,
132    struct orangefs_kernel_op_s *op)
133{
134	struct orangefs_dir_part *part, *new;
135	size_t count;
136
137	count = 1;
138	part = od->part;
139	while (part) {
140		count++;
141		if (part->next)
142			part = part->next;
143		else
144			break;
145	}
146
147	new = (void *)op->downcall.trailer_buf;
148	new->next = NULL;
149	new->len = op->downcall.trailer_size -
150	    sizeof(struct orangefs_readdir_response_s);
151	if (!od->part)
152		od->part = new;
153	else
154		part->next = new;
155	count++;
156	od->end = count << PART_SHIFT;
157
158	return 0;
159}
160
161static int orangefs_dir_more(struct orangefs_inode_s *oi,
162    struct orangefs_dir *od, struct dentry *dentry)
163{
164	struct orangefs_kernel_op_s *op;
165	int r;
166
167	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168	if (!op) {
169		od->error = -ENOMEM;
170		return -ENOMEM;
171	}
172	r = do_readdir(oi, od, dentry, op);
173	if (r) {
174		od->error = r;
175		goto out;
176	}
177	r = parse_readdir(od, op);
178	if (r) {
179		od->error = r;
180		goto out;
181	}
182
183	od->error = 0;
184out:
185	op_release(op);
186	return od->error;
187}
188
189static int fill_from_part(struct orangefs_dir_part *part,
190    struct dir_context *ctx)
191{
192	const int offset = sizeof(struct orangefs_readdir_response_s);
193	struct orangefs_khandle *khandle;
194	__u32 *len, padlen;
195	loff_t i;
196	char *s;
197	i = ctx->pos & ~PART_MASK;
198
199	/* The file offset from userspace is too large. */
200	if (i > part->len)
201		return 1;
202
203	/*
204	 * If the seek pointer is positioned just before an entry it
205	 * should find the next entry.
206	 */
207	if (i % 8)
208		i = i + (8 - i%8)%8;
209
210	while (i < part->len) {
211		if (part->len < i + sizeof *len)
212			break;
213		len = (void *)part + offset + i;
214		/*
215		 * len is the size of the string itself.  padlen is the
216		 * total size of the encoded string.
217		 */
218		padlen = (sizeof *len + *len + 1) +
219		    (8 - (sizeof *len + *len + 1)%8)%8;
220		if (part->len < i + padlen + sizeof *khandle)
221			goto next;
222		s = (void *)part + offset + i + sizeof *len;
223		if (s[*len] != 0)
224			goto next;
225		khandle = (void *)part + offset + i + padlen;
226		if (!dir_emit(ctx, s, *len,
227		    orangefs_khandle_to_ino(khandle),
228		    DT_UNKNOWN))
229			return 0;
230		i += padlen + sizeof *khandle;
231		i = i + (8 - i%8)%8;
232		BUG_ON(i > part->len);
233		ctx->pos = (ctx->pos & PART_MASK) | i;
234		continue;
235next:
236		i += 8;
237	}
238	return 1;
239}
240
241static int orangefs_dir_fill(struct orangefs_inode_s *oi,
242    struct orangefs_dir *od, struct dentry *dentry,
243    struct dir_context *ctx)
244{
245	struct orangefs_dir_part *part;
246	size_t count;
247
248	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
249
250	part = od->part;
251	while (part->next && count) {
252		count--;
253		part = part->next;
254	}
255	/* This means the userspace file offset is invalid. */
256	if (count) {
257		od->error = -EIO;
258		return -EIO;
259	}
260
261	while (part && part->len) {
262		int r;
263		r = fill_from_part(part, ctx);
264		if (r < 0) {
265			od->error = r;
266			return r;
267		} else if (r == 0) {
268			/* Userspace buffer is full. */
269			break;
270		} else {
271			/*
272			 * The part ran out of data.  Move to the next
273			 * part. */
274			ctx->pos = (ctx->pos & PART_MASK) +
275			    (1 << PART_SHIFT);
276			part = part->next;
277		}
278	}
279	return 0;
280}
281
282static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
283    int whence)
284{
285	struct orangefs_dir *od = file->private_data;
286	/*
287	 * Delete the stored data so userspace sees new directory
288	 * entries.
289	 */
290	if (!whence && offset < od->end) {
291		struct orangefs_dir_part *part = od->part;
292		while (part) {
293			struct orangefs_dir_part *next = part->next;
294			vfree(part);
295			part = next;
296		}
297		od->token = ORANGEFS_ITERATE_START;
298		od->part = NULL;
299		od->end = 1 << PART_SHIFT;
300	}
301	return default_llseek(file, offset, whence);
302}
303
304static int orangefs_dir_iterate(struct file *file,
305    struct dir_context *ctx)
306{
307	struct orangefs_inode_s *oi;
308	struct orangefs_dir *od;
309	struct dentry *dentry;
310	int r;
311
312	dentry = file->f_path.dentry;
313	oi = ORANGEFS_I(dentry->d_inode);
314	od = file->private_data;
315
316	if (od->error)
317		return od->error;
318
319	if (ctx->pos == 0) {
320		if (!dir_emit_dot(file, ctx))
321			return 0;
322		ctx->pos++;
323	}
324	if (ctx->pos == 1) {
325		if (!dir_emit_dotdot(file, ctx))
326			return 0;
327		ctx->pos = 1 << PART_SHIFT;
328	}
329
330	/*
331	 * The seek position is in the first synthesized part but is not
332	 * valid.
333	 */
334	if ((ctx->pos & PART_MASK) == 0)
335		return -EIO;
336
337	r = 0;
338
339	/*
340	 * Must read more if the user has sought past what has been read
341	 * so far.  Stop a user who has sought past the end.
342	 */
343	while (od->token != ORANGEFS_ITERATE_END &&
344	    ctx->pos > od->end) {
345		r = orangefs_dir_more(oi, od, dentry);
346		if (r)
347			return r;
348	}
349	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
350		return -EIO;
351
352	/* Then try to fill if there's any left in the buffer. */
353	if (ctx->pos < od->end) {
354		r = orangefs_dir_fill(oi, od, dentry, ctx);
355		if (r)
356			return r;
357	}
358
359	/* Finally get some more and try to fill. */
360	if (od->token != ORANGEFS_ITERATE_END) {
361		r = orangefs_dir_more(oi, od, dentry);
362		if (r)
363			return r;
364		r = orangefs_dir_fill(oi, od, dentry, ctx);
365	}
366
367	return r;
368}
369
370static int orangefs_dir_open(struct inode *inode, struct file *file)
371{
372	struct orangefs_dir *od;
373	file->private_data = kmalloc(sizeof(struct orangefs_dir),
374	    GFP_KERNEL);
375	if (!file->private_data)
376		return -ENOMEM;
377	od = file->private_data;
378	od->token = ORANGEFS_ITERATE_START;
379	od->part = NULL;
380	od->end = 1 << PART_SHIFT;
381	od->error = 0;
382	return 0;
383}
384
385static int orangefs_dir_release(struct inode *inode, struct file *file)
386{
387	struct orangefs_dir *od = file->private_data;
388	struct orangefs_dir_part *part = od->part;
389	while (part) {
390		struct orangefs_dir_part *next = part->next;
391		vfree(part);
392		part = next;
393	}
394	kfree(od);
395	return 0;
396}
397
398const struct file_operations orangefs_dir_operations = {
399	.llseek = orangefs_dir_llseek,
400	.read = generic_read_dir,
401	.iterate = orangefs_dir_iterate,
402	.open = orangefs_dir_open,
403	.release = orangefs_dir_release
404};
405