xref: /kernel/linux/linux-6.6/fs/fuse/readdir.c (revision 62306a36)
1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9
10#include "fuse_i.h"
11#include <linux/iversion.h>
12#include <linux/posix_acl.h>
13#include <linux/pagemap.h>
14#include <linux/highmem.h>
15
16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17{
18	struct fuse_conn *fc = get_fuse_conn(dir);
19	struct fuse_inode *fi = get_fuse_inode(dir);
20
21	if (!fc->do_readdirplus)
22		return false;
23	if (!fc->readdirplus_auto)
24		return true;
25	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26		return true;
27	if (ctx->pos == 0)
28		return true;
29	return false;
30}
31
32static void fuse_add_dirent_to_cache(struct file *file,
33				     struct fuse_dirent *dirent, loff_t pos)
34{
35	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37	pgoff_t index;
38	struct page *page;
39	loff_t size;
40	u64 version;
41	unsigned int offset;
42	void *addr;
43
44	spin_lock(&fi->rdc.lock);
45	/*
46	 * Is cache already completed?  Or this entry does not go at the end of
47	 * cache?
48	 */
49	if (fi->rdc.cached || pos != fi->rdc.pos) {
50		spin_unlock(&fi->rdc.lock);
51		return;
52	}
53	version = fi->rdc.version;
54	size = fi->rdc.size;
55	offset = size & ~PAGE_MASK;
56	index = size >> PAGE_SHIFT;
57	/* Dirent doesn't fit in current page?  Jump to next page. */
58	if (offset + reclen > PAGE_SIZE) {
59		index++;
60		offset = 0;
61	}
62	spin_unlock(&fi->rdc.lock);
63
64	if (offset) {
65		page = find_lock_page(file->f_mapping, index);
66	} else {
67		page = find_or_create_page(file->f_mapping, index,
68					   mapping_gfp_mask(file->f_mapping));
69	}
70	if (!page)
71		return;
72
73	spin_lock(&fi->rdc.lock);
74	/* Raced with another readdir */
75	if (fi->rdc.version != version || fi->rdc.size != size ||
76	    WARN_ON(fi->rdc.pos != pos))
77		goto unlock;
78
79	addr = kmap_local_page(page);
80	if (!offset) {
81		clear_page(addr);
82		SetPageUptodate(page);
83	}
84	memcpy(addr + offset, dirent, reclen);
85	kunmap_local(addr);
86	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87	fi->rdc.pos = dirent->off;
88unlock:
89	spin_unlock(&fi->rdc.lock);
90	unlock_page(page);
91	put_page(page);
92}
93
94static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95{
96	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97	loff_t end;
98
99	spin_lock(&fi->rdc.lock);
100	/* does cache end position match current position? */
101	if (fi->rdc.pos != pos) {
102		spin_unlock(&fi->rdc.lock);
103		return;
104	}
105
106	fi->rdc.cached = true;
107	end = ALIGN(fi->rdc.size, PAGE_SIZE);
108	spin_unlock(&fi->rdc.lock);
109
110	/* truncate unused tail of cache */
111	truncate_inode_pages(file->f_mapping, end);
112}
113
114static bool fuse_emit(struct file *file, struct dir_context *ctx,
115		      struct fuse_dirent *dirent)
116{
117	struct fuse_file *ff = file->private_data;
118
119	if (ff->open_flags & FOPEN_CACHE_DIR)
120		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121
122	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123			dirent->type);
124}
125
126static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127			 struct dir_context *ctx)
128{
129	while (nbytes >= FUSE_NAME_OFFSET) {
130		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131		size_t reclen = FUSE_DIRENT_SIZE(dirent);
132		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133			return -EIO;
134		if (reclen > nbytes)
135			break;
136		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137			return -EIO;
138
139		if (!fuse_emit(file, ctx, dirent))
140			break;
141
142		buf += reclen;
143		nbytes -= reclen;
144		ctx->pos = dirent->off;
145	}
146
147	return 0;
148}
149
150static int fuse_direntplus_link(struct file *file,
151				struct fuse_direntplus *direntplus,
152				u64 attr_version)
153{
154	struct fuse_entry_out *o = &direntplus->entry_out;
155	struct fuse_dirent *dirent = &direntplus->dirent;
156	struct dentry *parent = file->f_path.dentry;
157	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158	struct dentry *dentry;
159	struct dentry *alias;
160	struct inode *dir = d_inode(parent);
161	struct fuse_conn *fc;
162	struct inode *inode;
163	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164
165	if (!o->nodeid) {
166		/*
167		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
168		 * ENOENT. Instead, it only means the userspace filesystem did
169		 * not want to return attributes/handle for this entry.
170		 *
171		 * So do nothing.
172		 */
173		return 0;
174	}
175
176	if (name.name[0] == '.') {
177		/*
178		 * We could potentially refresh the attributes of the directory
179		 * and its parent?
180		 */
181		if (name.len == 1)
182			return 0;
183		if (name.name[1] == '.' && name.len == 2)
184			return 0;
185	}
186
187	if (invalid_nodeid(o->nodeid))
188		return -EIO;
189	if (fuse_invalid_attr(&o->attr))
190		return -EIO;
191
192	fc = get_fuse_conn(dir);
193
194	name.hash = full_name_hash(parent, name.name, name.len);
195	dentry = d_lookup(parent, &name);
196	if (!dentry) {
197retry:
198		dentry = d_alloc_parallel(parent, &name, &wq);
199		if (IS_ERR(dentry))
200			return PTR_ERR(dentry);
201	}
202	if (!d_in_lookup(dentry)) {
203		struct fuse_inode *fi;
204		inode = d_inode(dentry);
205		if (inode && get_node_id(inode) != o->nodeid)
206			inode = NULL;
207		if (!inode ||
208		    fuse_stale_inode(inode, o->generation, &o->attr)) {
209			if (inode)
210				fuse_make_bad(inode);
211			d_invalidate(dentry);
212			dput(dentry);
213			goto retry;
214		}
215		if (fuse_is_bad(inode)) {
216			dput(dentry);
217			return -EIO;
218		}
219
220		fi = get_fuse_inode(inode);
221		spin_lock(&fi->lock);
222		fi->nlookup++;
223		spin_unlock(&fi->lock);
224
225		forget_all_cached_acls(inode);
226		fuse_change_attributes(inode, &o->attr, NULL,
227				       ATTR_TIMEOUT(o),
228				       attr_version);
229		/*
230		 * The other branch comes via fuse_iget()
231		 * which bumps nlookup inside
232		 */
233	} else {
234		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235				  &o->attr, ATTR_TIMEOUT(o),
236				  attr_version);
237		if (!inode)
238			inode = ERR_PTR(-ENOMEM);
239
240		alias = d_splice_alias(inode, dentry);
241		d_lookup_done(dentry);
242		if (alias) {
243			dput(dentry);
244			dentry = alias;
245		}
246		if (IS_ERR(dentry)) {
247			if (!IS_ERR(inode)) {
248				struct fuse_inode *fi = get_fuse_inode(inode);
249
250				spin_lock(&fi->lock);
251				fi->nlookup--;
252				spin_unlock(&fi->lock);
253			}
254			return PTR_ERR(dentry);
255		}
256	}
257	if (fc->readdirplus_auto)
258		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
259	fuse_change_entry_timeout(dentry, o);
260
261	dput(dentry);
262	return 0;
263}
264
265static void fuse_force_forget(struct file *file, u64 nodeid)
266{
267	struct inode *inode = file_inode(file);
268	struct fuse_mount *fm = get_fuse_mount(inode);
269	struct fuse_forget_in inarg;
270	FUSE_ARGS(args);
271
272	memset(&inarg, 0, sizeof(inarg));
273	inarg.nlookup = 1;
274	args.opcode = FUSE_FORGET;
275	args.nodeid = nodeid;
276	args.in_numargs = 1;
277	args.in_args[0].size = sizeof(inarg);
278	args.in_args[0].value = &inarg;
279	args.force = true;
280	args.noreply = true;
281
282	fuse_simple_request(fm, &args);
283	/* ignore errors */
284}
285
286static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
287			     struct dir_context *ctx, u64 attr_version)
288{
289	struct fuse_direntplus *direntplus;
290	struct fuse_dirent *dirent;
291	size_t reclen;
292	int over = 0;
293	int ret;
294
295	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
296		direntplus = (struct fuse_direntplus *) buf;
297		dirent = &direntplus->dirent;
298		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
299
300		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
301			return -EIO;
302		if (reclen > nbytes)
303			break;
304		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
305			return -EIO;
306
307		if (!over) {
308			/* We fill entries into dstbuf only as much as
309			   it can hold. But we still continue iterating
310			   over remaining entries to link them. If not,
311			   we need to send a FORGET for each of those
312			   which we did not link.
313			*/
314			over = !fuse_emit(file, ctx, dirent);
315			if (!over)
316				ctx->pos = dirent->off;
317		}
318
319		buf += reclen;
320		nbytes -= reclen;
321
322		ret = fuse_direntplus_link(file, direntplus, attr_version);
323		if (ret)
324			fuse_force_forget(file, direntplus->entry_out.nodeid);
325	}
326
327	return 0;
328}
329
330static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
331{
332	int plus;
333	ssize_t res;
334	struct page *page;
335	struct inode *inode = file_inode(file);
336	struct fuse_mount *fm = get_fuse_mount(inode);
337	struct fuse_io_args ia = {};
338	struct fuse_args_pages *ap = &ia.ap;
339	struct fuse_page_desc desc = { .length = PAGE_SIZE };
340	u64 attr_version = 0;
341	bool locked;
342
343	page = alloc_page(GFP_KERNEL);
344	if (!page)
345		return -ENOMEM;
346
347	plus = fuse_use_readdirplus(inode, ctx);
348	ap->args.out_pages = true;
349	ap->num_pages = 1;
350	ap->pages = &page;
351	ap->descs = &desc;
352	if (plus) {
353		attr_version = fuse_get_attr_version(fm->fc);
354		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
355				    FUSE_READDIRPLUS);
356	} else {
357		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
358				    FUSE_READDIR);
359	}
360	locked = fuse_lock_inode(inode);
361	res = fuse_simple_request(fm, &ap->args);
362	fuse_unlock_inode(inode, locked);
363	if (res >= 0) {
364		if (!res) {
365			struct fuse_file *ff = file->private_data;
366
367			if (ff->open_flags & FOPEN_CACHE_DIR)
368				fuse_readdir_cache_end(file, ctx->pos);
369		} else if (plus) {
370			res = parse_dirplusfile(page_address(page), res,
371						file, ctx, attr_version);
372		} else {
373			res = parse_dirfile(page_address(page), res, file,
374					    ctx);
375		}
376	}
377
378	__free_page(page);
379	fuse_invalidate_atime(inode);
380	return res;
381}
382
383enum fuse_parse_result {
384	FOUND_ERR = -1,
385	FOUND_NONE = 0,
386	FOUND_SOME,
387	FOUND_ALL,
388};
389
390static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
391					       void *addr, unsigned int size,
392					       struct dir_context *ctx)
393{
394	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
395	enum fuse_parse_result res = FOUND_NONE;
396
397	WARN_ON(offset >= size);
398
399	for (;;) {
400		struct fuse_dirent *dirent = addr + offset;
401		unsigned int nbytes = size - offset;
402		size_t reclen;
403
404		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
405			break;
406
407		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
408
409		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
410			return FOUND_ERR;
411		if (WARN_ON(reclen > nbytes))
412			return FOUND_ERR;
413		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
414			return FOUND_ERR;
415
416		if (ff->readdir.pos == ctx->pos) {
417			res = FOUND_SOME;
418			if (!dir_emit(ctx, dirent->name, dirent->namelen,
419				      dirent->ino, dirent->type))
420				return FOUND_ALL;
421			ctx->pos = dirent->off;
422		}
423		ff->readdir.pos = dirent->off;
424		ff->readdir.cache_off += reclen;
425
426		offset += reclen;
427	}
428
429	return res;
430}
431
432static void fuse_rdc_reset(struct inode *inode)
433{
434	struct fuse_inode *fi = get_fuse_inode(inode);
435
436	fi->rdc.cached = false;
437	fi->rdc.version++;
438	fi->rdc.size = 0;
439	fi->rdc.pos = 0;
440}
441
442#define UNCACHED 1
443
444static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
445{
446	struct fuse_file *ff = file->private_data;
447	struct inode *inode = file_inode(file);
448	struct fuse_conn *fc = get_fuse_conn(inode);
449	struct fuse_inode *fi = get_fuse_inode(inode);
450	enum fuse_parse_result res;
451	pgoff_t index;
452	unsigned int size;
453	struct page *page;
454	void *addr;
455
456	/* Seeked?  If so, reset the cache stream */
457	if (ff->readdir.pos != ctx->pos) {
458		ff->readdir.pos = 0;
459		ff->readdir.cache_off = 0;
460	}
461
462	/*
463	 * We're just about to start reading into the cache or reading the
464	 * cache; both cases require an up-to-date mtime value.
465	 */
466	if (!ctx->pos && fc->auto_inval_data) {
467		int err = fuse_update_attributes(inode, file, STATX_MTIME);
468
469		if (err)
470			return err;
471	}
472
473retry:
474	spin_lock(&fi->rdc.lock);
475retry_locked:
476	if (!fi->rdc.cached) {
477		/* Starting cache? Set cache mtime. */
478		if (!ctx->pos && !fi->rdc.size) {
479			fi->rdc.mtime = inode->i_mtime;
480			fi->rdc.iversion = inode_query_iversion(inode);
481		}
482		spin_unlock(&fi->rdc.lock);
483		return UNCACHED;
484	}
485	/*
486	 * When at the beginning of the directory (i.e. just after opendir(3) or
487	 * rewinddir(3)), then need to check whether directory contents have
488	 * changed, and reset the cache if so.
489	 */
490	if (!ctx->pos) {
491		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
492		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
493			fuse_rdc_reset(inode);
494			goto retry_locked;
495		}
496	}
497
498	/*
499	 * If cache version changed since the last getdents() call, then reset
500	 * the cache stream.
501	 */
502	if (ff->readdir.version != fi->rdc.version) {
503		ff->readdir.pos = 0;
504		ff->readdir.cache_off = 0;
505	}
506	/*
507	 * If at the beginning of the cache, than reset version to
508	 * current.
509	 */
510	if (ff->readdir.pos == 0)
511		ff->readdir.version = fi->rdc.version;
512
513	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
514
515	index = ff->readdir.cache_off >> PAGE_SHIFT;
516
517	if (index == (fi->rdc.size >> PAGE_SHIFT))
518		size = fi->rdc.size & ~PAGE_MASK;
519	else
520		size = PAGE_SIZE;
521	spin_unlock(&fi->rdc.lock);
522
523	/* EOF? */
524	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
525		return 0;
526
527	page = find_get_page_flags(file->f_mapping, index,
528				   FGP_ACCESSED | FGP_LOCK);
529	/* Page gone missing, then re-added to cache, but not initialized? */
530	if (page && !PageUptodate(page)) {
531		unlock_page(page);
532		put_page(page);
533		page = NULL;
534	}
535	spin_lock(&fi->rdc.lock);
536	if (!page) {
537		/*
538		 * Uh-oh: page gone missing, cache is useless
539		 */
540		if (fi->rdc.version == ff->readdir.version)
541			fuse_rdc_reset(inode);
542		goto retry_locked;
543	}
544
545	/* Make sure it's still the same version after getting the page. */
546	if (ff->readdir.version != fi->rdc.version) {
547		spin_unlock(&fi->rdc.lock);
548		unlock_page(page);
549		put_page(page);
550		goto retry;
551	}
552	spin_unlock(&fi->rdc.lock);
553
554	/*
555	 * Contents of the page are now protected against changing by holding
556	 * the page lock.
557	 */
558	addr = kmap_local_page(page);
559	res = fuse_parse_cache(ff, addr, size, ctx);
560	kunmap_local(addr);
561	unlock_page(page);
562	put_page(page);
563
564	if (res == FOUND_ERR)
565		return -EIO;
566
567	if (res == FOUND_ALL)
568		return 0;
569
570	if (size == PAGE_SIZE) {
571		/* We hit end of page: skip to next page. */
572		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
573		goto retry;
574	}
575
576	/*
577	 * End of cache reached.  If found position, then we are done, otherwise
578	 * need to fall back to uncached, since the position we were looking for
579	 * wasn't in the cache.
580	 */
581	return res == FOUND_SOME ? 0 : UNCACHED;
582}
583
584int fuse_readdir(struct file *file, struct dir_context *ctx)
585{
586	struct fuse_file *ff = file->private_data;
587	struct inode *inode = file_inode(file);
588	int err;
589
590	if (fuse_is_bad(inode))
591		return -EIO;
592
593	mutex_lock(&ff->readdir.lock);
594
595	err = UNCACHED;
596	if (ff->open_flags & FOPEN_CACHE_DIR)
597		err = fuse_readdir_cached(file, ctx);
598	if (err == UNCACHED)
599		err = fuse_readdir_uncached(file, ctx);
600
601	mutex_unlock(&ff->readdir.lock);
602
603	return err;
604}
605