xref: /kernel/linux/linux-6.6/fs/erofs/fscache.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5 */
6#include <linux/pseudo_fs.h>
7#include <linux/fscache.h>
8#include "internal.h"
9
10static DEFINE_MUTEX(erofs_domain_list_lock);
11static DEFINE_MUTEX(erofs_domain_cookies_lock);
12static LIST_HEAD(erofs_domain_list);
13static LIST_HEAD(erofs_domain_cookies_list);
14static struct vfsmount *erofs_pseudo_mnt;
15
16static int erofs_anon_init_fs_context(struct fs_context *fc)
17{
18	return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
19}
20
21static struct file_system_type erofs_anon_fs_type = {
22	.owner		= THIS_MODULE,
23	.name           = "pseudo_erofs",
24	.init_fs_context = erofs_anon_init_fs_context,
25	.kill_sb        = kill_anon_super,
26};
27
28struct erofs_fscache_request {
29	struct erofs_fscache_request *primary;
30	struct netfs_cache_resources cache_resources;
31	struct address_space	*mapping;	/* The mapping being accessed */
32	loff_t			start;		/* Start position */
33	size_t			len;		/* Length of the request */
34	size_t			submitted;	/* Length of submitted */
35	short			error;		/* 0 or error that occurred */
36	refcount_t		ref;
37};
38
39static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
40					     loff_t start, size_t len)
41{
42	struct erofs_fscache_request *req;
43
44	req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
45	if (!req)
46		return ERR_PTR(-ENOMEM);
47
48	req->mapping = mapping;
49	req->start   = start;
50	req->len     = len;
51	refcount_set(&req->ref, 1);
52
53	return req;
54}
55
56static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
57					     size_t len)
58{
59	struct erofs_fscache_request *req;
60
61	/* use primary request for the first submission */
62	if (!primary->submitted) {
63		refcount_inc(&primary->ref);
64		return primary;
65	}
66
67	req = erofs_fscache_req_alloc(primary->mapping,
68			primary->start + primary->submitted, len);
69	if (!IS_ERR(req)) {
70		req->primary = primary;
71		refcount_inc(&primary->ref);
72	}
73	return req;
74}
75
76static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
77{
78	struct folio *folio;
79	bool failed = req->error;
80	pgoff_t start_page = req->start / PAGE_SIZE;
81	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
82
83	XA_STATE(xas, &req->mapping->i_pages, start_page);
84
85	rcu_read_lock();
86	xas_for_each(&xas, folio, last_page) {
87		if (xas_retry(&xas, folio))
88			continue;
89		if (!failed)
90			folio_mark_uptodate(folio);
91		folio_unlock(folio);
92	}
93	rcu_read_unlock();
94}
95
96static void erofs_fscache_req_put(struct erofs_fscache_request *req)
97{
98	if (refcount_dec_and_test(&req->ref)) {
99		if (req->cache_resources.ops)
100			req->cache_resources.ops->end_operation(&req->cache_resources);
101		if (!req->primary)
102			erofs_fscache_req_complete(req);
103		else
104			erofs_fscache_req_put(req->primary);
105		kfree(req);
106	}
107}
108
109static void erofs_fscache_subreq_complete(void *priv,
110		ssize_t transferred_or_error, bool was_async)
111{
112	struct erofs_fscache_request *req = priv;
113
114	if (IS_ERR_VALUE(transferred_or_error)) {
115		if (req->primary)
116			req->primary->error = transferred_or_error;
117		else
118			req->error = transferred_or_error;
119	}
120	erofs_fscache_req_put(req);
121}
122
123/*
124 * Read data from fscache (cookie, pstart, len), and fill the read data into
125 * page cache described by (req->mapping, lstart, len). @pstart describeis the
126 * start physical address in the cache file.
127 */
128static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
129		struct erofs_fscache_request *req, loff_t pstart, size_t len)
130{
131	enum netfs_io_source source;
132	struct super_block *sb = req->mapping->host->i_sb;
133	struct netfs_cache_resources *cres = &req->cache_resources;
134	struct iov_iter iter;
135	loff_t lstart = req->start + req->submitted;
136	size_t done = 0;
137	int ret;
138
139	DBG_BUGON(len > req->len - req->submitted);
140
141	ret = fscache_begin_read_operation(cres, cookie);
142	if (ret)
143		return ret;
144
145	while (done < len) {
146		loff_t sstart = pstart + done;
147		size_t slen = len - done;
148		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
149
150		source = cres->ops->prepare_ondemand_read(cres,
151				sstart, &slen, LLONG_MAX, &flags, 0);
152		if (WARN_ON(slen == 0))
153			source = NETFS_INVALID_READ;
154		if (source != NETFS_READ_FROM_CACHE) {
155			erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
156			return -EIO;
157		}
158
159		refcount_inc(&req->ref);
160		iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
161				lstart + done, slen);
162
163		ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
164				   erofs_fscache_subreq_complete, req);
165		if (ret == -EIOCBQUEUED)
166			ret = 0;
167		if (ret) {
168			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
169			return ret;
170		}
171
172		done += slen;
173	}
174	DBG_BUGON(done != len);
175	return 0;
176}
177
178static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
179{
180	int ret;
181	struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
182	struct erofs_fscache_request *req;
183
184	req = erofs_fscache_req_alloc(folio_mapping(folio),
185				folio_pos(folio), folio_size(folio));
186	if (IS_ERR(req)) {
187		folio_unlock(folio);
188		return PTR_ERR(req);
189	}
190
191	ret = erofs_fscache_read_folios_async(ctx->cookie, req,
192				folio_pos(folio), folio_size(folio));
193	if (ret)
194		req->error = ret;
195
196	erofs_fscache_req_put(req);
197	return ret;
198}
199
200static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
201{
202	struct address_space *mapping = primary->mapping;
203	struct inode *inode = mapping->host;
204	struct super_block *sb = inode->i_sb;
205	struct erofs_fscache_request *req;
206	struct erofs_map_blocks map;
207	struct erofs_map_dev mdev;
208	struct iov_iter iter;
209	loff_t pos = primary->start + primary->submitted;
210	size_t count;
211	int ret;
212
213	map.m_la = pos;
214	ret = erofs_map_blocks(inode, &map);
215	if (ret)
216		return ret;
217
218	if (map.m_flags & EROFS_MAP_META) {
219		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
220		erofs_blk_t blknr;
221		size_t offset, size;
222		void *src;
223
224		/* For tail packing layout, the offset may be non-zero. */
225		offset = erofs_blkoff(sb, map.m_pa);
226		blknr = erofs_blknr(sb, map.m_pa);
227		size = map.m_llen;
228
229		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
230		if (IS_ERR(src))
231			return PTR_ERR(src);
232
233		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
234		if (copy_to_iter(src + offset, size, &iter) != size) {
235			erofs_put_metabuf(&buf);
236			return -EFAULT;
237		}
238		iov_iter_zero(PAGE_SIZE - size, &iter);
239		erofs_put_metabuf(&buf);
240		primary->submitted += PAGE_SIZE;
241		return 0;
242	}
243
244	count = primary->len - primary->submitted;
245	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
246		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
247		iov_iter_zero(count, &iter);
248		primary->submitted += count;
249		return 0;
250	}
251
252	count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
253	DBG_BUGON(!count || count % PAGE_SIZE);
254
255	mdev = (struct erofs_map_dev) {
256		.m_deviceid = map.m_deviceid,
257		.m_pa = map.m_pa,
258	};
259	ret = erofs_map_dev(sb, &mdev);
260	if (ret)
261		return ret;
262
263	req = erofs_fscache_req_chain(primary, count);
264	if (IS_ERR(req))
265		return PTR_ERR(req);
266
267	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
268			req, mdev.m_pa + (pos - map.m_la), count);
269	erofs_fscache_req_put(req);
270	primary->submitted += count;
271	return ret;
272}
273
274static int erofs_fscache_data_read(struct erofs_fscache_request *req)
275{
276	int ret;
277
278	do {
279		ret = erofs_fscache_data_read_slice(req);
280		if (ret)
281			req->error = ret;
282	} while (!ret && req->submitted < req->len);
283
284	return ret;
285}
286
287static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
288{
289	struct erofs_fscache_request *req;
290	int ret;
291
292	req = erofs_fscache_req_alloc(folio_mapping(folio),
293			folio_pos(folio), folio_size(folio));
294	if (IS_ERR(req)) {
295		folio_unlock(folio);
296		return PTR_ERR(req);
297	}
298
299	ret = erofs_fscache_data_read(req);
300	erofs_fscache_req_put(req);
301	return ret;
302}
303
304static void erofs_fscache_readahead(struct readahead_control *rac)
305{
306	struct erofs_fscache_request *req;
307
308	if (!readahead_count(rac))
309		return;
310
311	req = erofs_fscache_req_alloc(rac->mapping,
312			readahead_pos(rac), readahead_length(rac));
313	if (IS_ERR(req))
314		return;
315
316	/* The request completion will drop refs on the folios. */
317	while (readahead_folio(rac))
318		;
319
320	erofs_fscache_data_read(req);
321	erofs_fscache_req_put(req);
322}
323
324static const struct address_space_operations erofs_fscache_meta_aops = {
325	.read_folio = erofs_fscache_meta_read_folio,
326};
327
328const struct address_space_operations erofs_fscache_access_aops = {
329	.read_folio = erofs_fscache_read_folio,
330	.readahead = erofs_fscache_readahead,
331};
332
333static void erofs_fscache_domain_put(struct erofs_domain *domain)
334{
335	mutex_lock(&erofs_domain_list_lock);
336	if (refcount_dec_and_test(&domain->ref)) {
337		list_del(&domain->list);
338		if (list_empty(&erofs_domain_list)) {
339			kern_unmount(erofs_pseudo_mnt);
340			erofs_pseudo_mnt = NULL;
341		}
342		fscache_relinquish_volume(domain->volume, NULL, false);
343		mutex_unlock(&erofs_domain_list_lock);
344		kfree(domain->domain_id);
345		kfree(domain);
346		return;
347	}
348	mutex_unlock(&erofs_domain_list_lock);
349}
350
351static int erofs_fscache_register_volume(struct super_block *sb)
352{
353	struct erofs_sb_info *sbi = EROFS_SB(sb);
354	char *domain_id = sbi->domain_id;
355	struct fscache_volume *volume;
356	char *name;
357	int ret = 0;
358
359	name = kasprintf(GFP_KERNEL, "erofs,%s",
360			 domain_id ? domain_id : sbi->fsid);
361	if (!name)
362		return -ENOMEM;
363
364	volume = fscache_acquire_volume(name, NULL, NULL, 0);
365	if (IS_ERR_OR_NULL(volume)) {
366		erofs_err(sb, "failed to register volume for %s", name);
367		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
368		volume = NULL;
369	}
370
371	sbi->volume = volume;
372	kfree(name);
373	return ret;
374}
375
376static int erofs_fscache_init_domain(struct super_block *sb)
377{
378	int err;
379	struct erofs_domain *domain;
380	struct erofs_sb_info *sbi = EROFS_SB(sb);
381
382	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
383	if (!domain)
384		return -ENOMEM;
385
386	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
387	if (!domain->domain_id) {
388		kfree(domain);
389		return -ENOMEM;
390	}
391
392	err = erofs_fscache_register_volume(sb);
393	if (err)
394		goto out;
395
396	if (!erofs_pseudo_mnt) {
397		struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
398		if (IS_ERR(mnt)) {
399			err = PTR_ERR(mnt);
400			goto out;
401		}
402		erofs_pseudo_mnt = mnt;
403	}
404
405	domain->volume = sbi->volume;
406	refcount_set(&domain->ref, 1);
407	list_add(&domain->list, &erofs_domain_list);
408	sbi->domain = domain;
409	return 0;
410out:
411	kfree(domain->domain_id);
412	kfree(domain);
413	return err;
414}
415
416static int erofs_fscache_register_domain(struct super_block *sb)
417{
418	int err;
419	struct erofs_domain *domain;
420	struct erofs_sb_info *sbi = EROFS_SB(sb);
421
422	mutex_lock(&erofs_domain_list_lock);
423	list_for_each_entry(domain, &erofs_domain_list, list) {
424		if (!strcmp(domain->domain_id, sbi->domain_id)) {
425			sbi->domain = domain;
426			sbi->volume = domain->volume;
427			refcount_inc(&domain->ref);
428			mutex_unlock(&erofs_domain_list_lock);
429			return 0;
430		}
431	}
432	err = erofs_fscache_init_domain(sb);
433	mutex_unlock(&erofs_domain_list_lock);
434	return err;
435}
436
437static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
438						char *name, unsigned int flags)
439{
440	struct fscache_volume *volume = EROFS_SB(sb)->volume;
441	struct erofs_fscache *ctx;
442	struct fscache_cookie *cookie;
443	struct super_block *isb;
444	struct inode *inode;
445	int ret;
446
447	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
448	if (!ctx)
449		return ERR_PTR(-ENOMEM);
450	INIT_LIST_HEAD(&ctx->node);
451	refcount_set(&ctx->ref, 1);
452
453	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
454					name, strlen(name), NULL, 0, 0);
455	if (!cookie) {
456		erofs_err(sb, "failed to get cookie for %s", name);
457		ret = -EINVAL;
458		goto err;
459	}
460	fscache_use_cookie(cookie, false);
461
462	/*
463	 * Allocate anonymous inode in global pseudo mount for shareable blobs,
464	 * so that they are accessible among erofs fs instances.
465	 */
466	isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
467	inode = new_inode(isb);
468	if (!inode) {
469		erofs_err(sb, "failed to get anon inode for %s", name);
470		ret = -ENOMEM;
471		goto err_cookie;
472	}
473
474	inode->i_size = OFFSET_MAX;
475	inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
476	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
477	inode->i_blkbits = EROFS_SB(sb)->blkszbits;
478	inode->i_private = ctx;
479
480	ctx->cookie = cookie;
481	ctx->inode = inode;
482	return ctx;
483
484err_cookie:
485	fscache_unuse_cookie(cookie, NULL, NULL);
486	fscache_relinquish_cookie(cookie, false);
487err:
488	kfree(ctx);
489	return ERR_PTR(ret);
490}
491
492static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
493{
494	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
495	fscache_relinquish_cookie(ctx->cookie, false);
496	iput(ctx->inode);
497	kfree(ctx->name);
498	kfree(ctx);
499}
500
501static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
502						char *name, unsigned int flags)
503{
504	struct erofs_fscache *ctx;
505	struct erofs_domain *domain = EROFS_SB(sb)->domain;
506
507	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
508	if (IS_ERR(ctx))
509		return ctx;
510
511	ctx->name = kstrdup(name, GFP_KERNEL);
512	if (!ctx->name) {
513		erofs_fscache_relinquish_cookie(ctx);
514		return ERR_PTR(-ENOMEM);
515	}
516
517	refcount_inc(&domain->ref);
518	ctx->domain = domain;
519	list_add(&ctx->node, &erofs_domain_cookies_list);
520	return ctx;
521}
522
523static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
524						char *name, unsigned int flags)
525{
526	struct erofs_fscache *ctx;
527	struct erofs_domain *domain = EROFS_SB(sb)->domain;
528
529	flags |= EROFS_REG_COOKIE_SHARE;
530	mutex_lock(&erofs_domain_cookies_lock);
531	list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
532		if (ctx->domain != domain || strcmp(ctx->name, name))
533			continue;
534		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
535			refcount_inc(&ctx->ref);
536		} else {
537			erofs_err(sb, "%s already exists in domain %s", name,
538				  domain->domain_id);
539			ctx = ERR_PTR(-EEXIST);
540		}
541		mutex_unlock(&erofs_domain_cookies_lock);
542		return ctx;
543	}
544	ctx = erofs_domain_init_cookie(sb, name, flags);
545	mutex_unlock(&erofs_domain_cookies_lock);
546	return ctx;
547}
548
549struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
550						    char *name,
551						    unsigned int flags)
552{
553	if (EROFS_SB(sb)->domain_id)
554		return erofs_domain_register_cookie(sb, name, flags);
555	return erofs_fscache_acquire_cookie(sb, name, flags);
556}
557
558void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
559{
560	struct erofs_domain *domain = NULL;
561
562	if (!ctx)
563		return;
564	if (!ctx->domain)
565		return erofs_fscache_relinquish_cookie(ctx);
566
567	mutex_lock(&erofs_domain_cookies_lock);
568	if (refcount_dec_and_test(&ctx->ref)) {
569		domain = ctx->domain;
570		list_del(&ctx->node);
571		erofs_fscache_relinquish_cookie(ctx);
572	}
573	mutex_unlock(&erofs_domain_cookies_lock);
574	if (domain)
575		erofs_fscache_domain_put(domain);
576}
577
578int erofs_fscache_register_fs(struct super_block *sb)
579{
580	int ret;
581	struct erofs_sb_info *sbi = EROFS_SB(sb);
582	struct erofs_fscache *fscache;
583	unsigned int flags = 0;
584
585	if (sbi->domain_id)
586		ret = erofs_fscache_register_domain(sb);
587	else
588		ret = erofs_fscache_register_volume(sb);
589	if (ret)
590		return ret;
591
592	/*
593	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
594	 * the primary data blob (aka fsid) is unique in the shared domain.
595	 *
596	 * For non-shared-domain case, fscache_acquire_volume() invoked by
597	 * erofs_fscache_register_volume() has already guaranteed
598	 * the uniqueness of primary data blob.
599	 *
600	 * Acquired domain/volume will be relinquished in kill_sb() on error.
601	 */
602	if (sbi->domain_id)
603		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
604	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
605	if (IS_ERR(fscache))
606		return PTR_ERR(fscache);
607
608	sbi->s_fscache = fscache;
609	return 0;
610}
611
612void erofs_fscache_unregister_fs(struct super_block *sb)
613{
614	struct erofs_sb_info *sbi = EROFS_SB(sb);
615
616	erofs_fscache_unregister_cookie(sbi->s_fscache);
617
618	if (sbi->domain)
619		erofs_fscache_domain_put(sbi->domain);
620	else
621		fscache_relinquish_volume(sbi->volume, NULL, false);
622
623	sbi->s_fscache = NULL;
624	sbi->volume = NULL;
625	sbi->domain = NULL;
626}
627