18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2018 HUAWEI, Inc.
48c2ecf20Sopenharmony_ci *             https://www.huawei.com/
58c2ecf20Sopenharmony_ci * Created by Gao Xiang <gaoxiang25@huawei.com>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci#include "zdata.h"
88c2ecf20Sopenharmony_ci#include "compress.h"
98c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <trace/events/erofs.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci/*
148c2ecf20Sopenharmony_ci * a compressed_pages[] placeholder in order to avoid
158c2ecf20Sopenharmony_ci * being filled with file pages for in-place decompression.
168c2ecf20Sopenharmony_ci */
178c2ecf20Sopenharmony_ci#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci/* how to allocate cached pages for a pcluster */
208c2ecf20Sopenharmony_cienum z_erofs_cache_alloctype {
218c2ecf20Sopenharmony_ci	DONTALLOC,	/* don't allocate any cached pages */
228c2ecf20Sopenharmony_ci	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
238c2ecf20Sopenharmony_ci};
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/*
268c2ecf20Sopenharmony_ci * tagged pointer with 1-bit tag for all compressed pages
278c2ecf20Sopenharmony_ci * tag 0 - the page is just found with an extra page reference
288c2ecf20Sopenharmony_ci */
298c2ecf20Sopenharmony_citypedef tagptr1_t compressed_page_t;
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#define tag_compressed_page_justfound(page) \
328c2ecf20Sopenharmony_ci	tagptr_fold(compressed_page_t, page, 1)
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistatic struct workqueue_struct *z_erofs_workqueue __read_mostly;
358c2ecf20Sopenharmony_cistatic struct kmem_cache *pcluster_cachep __read_mostly;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_civoid z_erofs_exit_zip_subsystem(void)
388c2ecf20Sopenharmony_ci{
398c2ecf20Sopenharmony_ci	destroy_workqueue(z_erofs_workqueue);
408c2ecf20Sopenharmony_ci	kmem_cache_destroy(pcluster_cachep);
418c2ecf20Sopenharmony_ci}
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_cistatic inline int z_erofs_init_workqueue(void)
448c2ecf20Sopenharmony_ci{
458c2ecf20Sopenharmony_ci	const unsigned int onlinecpus = num_possible_cpus();
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	/*
488c2ecf20Sopenharmony_ci	 * no need to spawn too many threads, limiting threads could minimum
498c2ecf20Sopenharmony_ci	 * scheduling overhead, perhaps per-CPU threads should be better?
508c2ecf20Sopenharmony_ci	 */
518c2ecf20Sopenharmony_ci	z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
528c2ecf20Sopenharmony_ci					    WQ_UNBOUND | WQ_HIGHPRI,
538c2ecf20Sopenharmony_ci					    onlinecpus + onlinecpus / 4);
548c2ecf20Sopenharmony_ci	return z_erofs_workqueue ? 0 : -ENOMEM;
558c2ecf20Sopenharmony_ci}
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic void z_erofs_pcluster_init_once(void *ptr)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *pcl = ptr;
608c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
618c2ecf20Sopenharmony_ci	unsigned int i;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	mutex_init(&cl->lock);
648c2ecf20Sopenharmony_ci	cl->nr_pages = 0;
658c2ecf20Sopenharmony_ci	cl->vcnt = 0;
668c2ecf20Sopenharmony_ci	for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
678c2ecf20Sopenharmony_ci		pcl->compressed_pages[i] = NULL;
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ciint __init z_erofs_init_zip_subsystem(void)
718c2ecf20Sopenharmony_ci{
728c2ecf20Sopenharmony_ci	pcluster_cachep = kmem_cache_create("erofs_compress",
738c2ecf20Sopenharmony_ci					    Z_EROFS_WORKGROUP_SIZE, 0,
748c2ecf20Sopenharmony_ci					    SLAB_RECLAIM_ACCOUNT,
758c2ecf20Sopenharmony_ci					    z_erofs_pcluster_init_once);
768c2ecf20Sopenharmony_ci	if (pcluster_cachep) {
778c2ecf20Sopenharmony_ci		if (!z_erofs_init_workqueue())
788c2ecf20Sopenharmony_ci			return 0;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci		kmem_cache_destroy(pcluster_cachep);
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci	return -ENOMEM;
838c2ecf20Sopenharmony_ci}
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_cienum z_erofs_collectmode {
868c2ecf20Sopenharmony_ci	COLLECT_SECONDARY,
878c2ecf20Sopenharmony_ci	COLLECT_PRIMARY,
888c2ecf20Sopenharmony_ci	/*
898c2ecf20Sopenharmony_ci	 * The current collection was the tail of an exist chain, in addition
908c2ecf20Sopenharmony_ci	 * that the previous processed chained collections are all decided to
918c2ecf20Sopenharmony_ci	 * be hooked up to it.
928c2ecf20Sopenharmony_ci	 * A new chain will be created for the remaining collections which are
938c2ecf20Sopenharmony_ci	 * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
948c2ecf20Sopenharmony_ci	 * the next collection cannot reuse the whole page safely in
958c2ecf20Sopenharmony_ci	 * the following scenario:
968c2ecf20Sopenharmony_ci	 *  ________________________________________________________________
978c2ecf20Sopenharmony_ci	 * |      tail (partial) page     |       head (partial) page       |
988c2ecf20Sopenharmony_ci	 * |   (belongs to the next cl)   |   (belongs to the current cl)   |
998c2ecf20Sopenharmony_ci	 * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
1008c2ecf20Sopenharmony_ci	 */
1018c2ecf20Sopenharmony_ci	COLLECT_PRIMARY_HOOKED,
1028c2ecf20Sopenharmony_ci	COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
1038c2ecf20Sopenharmony_ci	/*
1048c2ecf20Sopenharmony_ci	 * The current collection has been linked with the owned chain, and
1058c2ecf20Sopenharmony_ci	 * could also be linked with the remaining collections, which means
1068c2ecf20Sopenharmony_ci	 * if the processing page is the tail page of the collection, thus
1078c2ecf20Sopenharmony_ci	 * the current collection can safely use the whole page (since
1088c2ecf20Sopenharmony_ci	 * the previous collection is under control) for in-place I/O, as
1098c2ecf20Sopenharmony_ci	 * illustrated below:
1108c2ecf20Sopenharmony_ci	 *  ________________________________________________________________
1118c2ecf20Sopenharmony_ci	 * |  tail (partial) page |          head (partial) page           |
1128c2ecf20Sopenharmony_ci	 * |  (of the current cl) |      (of the previous collection)      |
1138c2ecf20Sopenharmony_ci	 * |  PRIMARY_FOLLOWED or |                                        |
1148c2ecf20Sopenharmony_ci	 * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
1158c2ecf20Sopenharmony_ci	 *
1168c2ecf20Sopenharmony_ci	 * [  (*) the above page can be used as inplace I/O.               ]
1178c2ecf20Sopenharmony_ci	 */
1188c2ecf20Sopenharmony_ci	COLLECT_PRIMARY_FOLLOWED,
1198c2ecf20Sopenharmony_ci};
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_cistruct z_erofs_collector {
1228c2ecf20Sopenharmony_ci	struct z_erofs_pagevec_ctor vector;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *pcl, *tailpcl;
1258c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl;
1268c2ecf20Sopenharmony_ci	struct page **compressedpages;
1278c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t owned_head;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	enum z_erofs_collectmode mode;
1308c2ecf20Sopenharmony_ci};
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_cistruct z_erofs_decompress_frontend {
1338c2ecf20Sopenharmony_ci	struct inode *const inode;
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	struct z_erofs_collector clt;
1368c2ecf20Sopenharmony_ci	struct erofs_map_blocks map;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	bool readahead;
1398c2ecf20Sopenharmony_ci	/* used for applying cache strategy on the fly */
1408c2ecf20Sopenharmony_ci	bool backmost;
1418c2ecf20Sopenharmony_ci	erofs_off_t headoffset;
1428c2ecf20Sopenharmony_ci};
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci#define COLLECTOR_INIT() { \
1458c2ecf20Sopenharmony_ci	.owned_head = Z_EROFS_PCLUSTER_TAIL, \
1468c2ecf20Sopenharmony_ci	.mode = COLLECT_PRIMARY_FOLLOWED }
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci#define DECOMPRESS_FRONTEND_INIT(__i) { \
1498c2ecf20Sopenharmony_ci	.inode = __i, .clt = COLLECTOR_INIT(), \
1508c2ecf20Sopenharmony_ci	.backmost = true, }
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
1538c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(z_pagemap_global_lock);
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistatic void preload_compressed_pages(struct z_erofs_collector *clt,
1568c2ecf20Sopenharmony_ci				     struct address_space *mc,
1578c2ecf20Sopenharmony_ci				     enum z_erofs_cache_alloctype type)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	const struct z_erofs_pcluster *pcl = clt->pcl;
1608c2ecf20Sopenharmony_ci	const unsigned int clusterpages = BIT(pcl->clusterbits);
1618c2ecf20Sopenharmony_ci	struct page **pages = clt->compressedpages;
1628c2ecf20Sopenharmony_ci	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
1638c2ecf20Sopenharmony_ci	bool standalone = true;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
1668c2ecf20Sopenharmony_ci		return;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
1698c2ecf20Sopenharmony_ci		struct page *page;
1708c2ecf20Sopenharmony_ci		compressed_page_t t;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		/* the compressed page was loaded before */
1738c2ecf20Sopenharmony_ci		if (READ_ONCE(*pages))
1748c2ecf20Sopenharmony_ci			continue;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci		page = find_get_page(mc, index);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci		if (page) {
1798c2ecf20Sopenharmony_ci			t = tag_compressed_page_justfound(page);
1808c2ecf20Sopenharmony_ci		} else if (type == DELAYEDALLOC) {
1818c2ecf20Sopenharmony_ci			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
1828c2ecf20Sopenharmony_ci		} else {	/* DONTALLOC */
1838c2ecf20Sopenharmony_ci			if (standalone)
1848c2ecf20Sopenharmony_ci				clt->compressedpages = pages;
1858c2ecf20Sopenharmony_ci			standalone = false;
1868c2ecf20Sopenharmony_ci			continue;
1878c2ecf20Sopenharmony_ci		}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
1908c2ecf20Sopenharmony_ci			continue;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci		if (page)
1938c2ecf20Sopenharmony_ci			put_page(page);
1948c2ecf20Sopenharmony_ci	}
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
1978c2ecf20Sopenharmony_ci		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
1988c2ecf20Sopenharmony_ci}
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci/* called by erofs_shrinker to get rid of all compressed_pages */
2018c2ecf20Sopenharmony_ciint erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
2028c2ecf20Sopenharmony_ci				       struct erofs_workgroup *grp)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *const pcl =
2058c2ecf20Sopenharmony_ci		container_of(grp, struct z_erofs_pcluster, obj);
2068c2ecf20Sopenharmony_ci	struct address_space *const mapping = MNGD_MAPPING(sbi);
2078c2ecf20Sopenharmony_ci	const unsigned int clusterpages = BIT(pcl->clusterbits);
2088c2ecf20Sopenharmony_ci	int i;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	/*
2118c2ecf20Sopenharmony_ci	 * refcount of workgroup is now freezed as 1,
2128c2ecf20Sopenharmony_ci	 * therefore no need to worry about available decompression users.
2138c2ecf20Sopenharmony_ci	 */
2148c2ecf20Sopenharmony_ci	for (i = 0; i < clusterpages; ++i) {
2158c2ecf20Sopenharmony_ci		struct page *page = pcl->compressed_pages[i];
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci		if (!page)
2188c2ecf20Sopenharmony_ci			continue;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci		/* block other users from reclaiming or migrating the page */
2218c2ecf20Sopenharmony_ci		if (!trylock_page(page))
2228c2ecf20Sopenharmony_ci			return -EBUSY;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci		if (page->mapping != mapping)
2258c2ecf20Sopenharmony_ci			continue;
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		/* barrier is implied in the following 'unlock_page' */
2288c2ecf20Sopenharmony_ci		WRITE_ONCE(pcl->compressed_pages[i], NULL);
2298c2ecf20Sopenharmony_ci		set_page_private(page, 0);
2308c2ecf20Sopenharmony_ci		ClearPagePrivate(page);
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci		unlock_page(page);
2338c2ecf20Sopenharmony_ci		put_page(page);
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci	return 0;
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ciint erofs_try_to_free_cached_page(struct address_space *mapping,
2398c2ecf20Sopenharmony_ci				  struct page *page)
2408c2ecf20Sopenharmony_ci{
2418c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *const pcl = (void *)page_private(page);
2428c2ecf20Sopenharmony_ci	const unsigned int clusterpages = BIT(pcl->clusterbits);
2438c2ecf20Sopenharmony_ci	int ret = 0;	/* 0 - busy */
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
2468c2ecf20Sopenharmony_ci		unsigned int i;
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci		for (i = 0; i < clusterpages; ++i) {
2498c2ecf20Sopenharmony_ci			if (pcl->compressed_pages[i] == page) {
2508c2ecf20Sopenharmony_ci				WRITE_ONCE(pcl->compressed_pages[i], NULL);
2518c2ecf20Sopenharmony_ci				ret = 1;
2528c2ecf20Sopenharmony_ci				break;
2538c2ecf20Sopenharmony_ci			}
2548c2ecf20Sopenharmony_ci		}
2558c2ecf20Sopenharmony_ci		erofs_workgroup_unfreeze(&pcl->obj, 1);
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci		if (ret) {
2588c2ecf20Sopenharmony_ci			ClearPagePrivate(page);
2598c2ecf20Sopenharmony_ci			put_page(page);
2608c2ecf20Sopenharmony_ci		}
2618c2ecf20Sopenharmony_ci	}
2628c2ecf20Sopenharmony_ci	return ret;
2638c2ecf20Sopenharmony_ci}
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
2668c2ecf20Sopenharmony_cistatic inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
2678c2ecf20Sopenharmony_ci					  struct page *page)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *const pcl = clt->pcl;
2708c2ecf20Sopenharmony_ci	const unsigned int clusterpages = BIT(pcl->clusterbits);
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
2738c2ecf20Sopenharmony_ci		if (!cmpxchg(clt->compressedpages++, NULL, page))
2748c2ecf20Sopenharmony_ci			return true;
2758c2ecf20Sopenharmony_ci	}
2768c2ecf20Sopenharmony_ci	return false;
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci/* callers must be with collection lock held */
2808c2ecf20Sopenharmony_cistatic int z_erofs_attach_page(struct z_erofs_collector *clt,
2818c2ecf20Sopenharmony_ci			       struct page *page, enum z_erofs_page_type type,
2828c2ecf20Sopenharmony_ci			       bool pvec_safereuse)
2838c2ecf20Sopenharmony_ci{
2848c2ecf20Sopenharmony_ci	int ret;
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	/* give priority for inplaceio */
2878c2ecf20Sopenharmony_ci	if (clt->mode >= COLLECT_PRIMARY &&
2888c2ecf20Sopenharmony_ci	    type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
2898c2ecf20Sopenharmony_ci	    z_erofs_try_inplace_io(clt, page))
2908c2ecf20Sopenharmony_ci		return 0;
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
2938c2ecf20Sopenharmony_ci				      pvec_safereuse);
2948c2ecf20Sopenharmony_ci	clt->cl->vcnt += (unsigned int)ret;
2958c2ecf20Sopenharmony_ci	return ret ? 0 : -EAGAIN;
2968c2ecf20Sopenharmony_ci}
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_cistatic enum z_erofs_collectmode
2998c2ecf20Sopenharmony_citry_to_claim_pcluster(struct z_erofs_pcluster *pcl,
3008c2ecf20Sopenharmony_ci		      z_erofs_next_pcluster_t *owned_head)
3018c2ecf20Sopenharmony_ci{
3028c2ecf20Sopenharmony_ci	/* let's claim these following types of pclusters */
3038c2ecf20Sopenharmony_ciretry:
3048c2ecf20Sopenharmony_ci	if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
3058c2ecf20Sopenharmony_ci		/* type 1, nil pcluster */
3068c2ecf20Sopenharmony_ci		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
3078c2ecf20Sopenharmony_ci			    *owned_head) != Z_EROFS_PCLUSTER_NIL)
3088c2ecf20Sopenharmony_ci			goto retry;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci		*owned_head = &pcl->next;
3118c2ecf20Sopenharmony_ci		/* lucky, I am the followee :) */
3128c2ecf20Sopenharmony_ci		return COLLECT_PRIMARY_FOLLOWED;
3138c2ecf20Sopenharmony_ci	} else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
3148c2ecf20Sopenharmony_ci		/*
3158c2ecf20Sopenharmony_ci		 * type 2, link to the end of a existing open chain,
3168c2ecf20Sopenharmony_ci		 * be careful that its submission itself is governed
3178c2ecf20Sopenharmony_ci		 * by the original owned chain.
3188c2ecf20Sopenharmony_ci		 */
3198c2ecf20Sopenharmony_ci		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
3208c2ecf20Sopenharmony_ci			    *owned_head) != Z_EROFS_PCLUSTER_TAIL)
3218c2ecf20Sopenharmony_ci			goto retry;
3228c2ecf20Sopenharmony_ci		*owned_head = Z_EROFS_PCLUSTER_TAIL;
3238c2ecf20Sopenharmony_ci		return COLLECT_PRIMARY_HOOKED;
3248c2ecf20Sopenharmony_ci	}
3258c2ecf20Sopenharmony_ci	return COLLECT_PRIMARY;	/* :( better luck next time */
3268c2ecf20Sopenharmony_ci}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_cistatic int z_erofs_lookup_collection(struct z_erofs_collector *clt,
3298c2ecf20Sopenharmony_ci				     struct inode *inode,
3308c2ecf20Sopenharmony_ci				     struct erofs_map_blocks *map)
3318c2ecf20Sopenharmony_ci{
3328c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *pcl = clt->pcl;
3338c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl;
3348c2ecf20Sopenharmony_ci	unsigned int length;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	/* to avoid unexpected loop formed by corrupted images */
3378c2ecf20Sopenharmony_ci	if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
3388c2ecf20Sopenharmony_ci		DBG_BUGON(1);
3398c2ecf20Sopenharmony_ci		return -EFSCORRUPTED;
3408c2ecf20Sopenharmony_ci	}
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	cl = z_erofs_primarycollection(pcl);
3438c2ecf20Sopenharmony_ci	if (cl->pageofs != (map->m_la & ~PAGE_MASK)) {
3448c2ecf20Sopenharmony_ci		DBG_BUGON(1);
3458c2ecf20Sopenharmony_ci		return -EFSCORRUPTED;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	length = READ_ONCE(pcl->length);
3498c2ecf20Sopenharmony_ci	if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
3508c2ecf20Sopenharmony_ci		if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
3518c2ecf20Sopenharmony_ci			DBG_BUGON(1);
3528c2ecf20Sopenharmony_ci			return -EFSCORRUPTED;
3538c2ecf20Sopenharmony_ci		}
3548c2ecf20Sopenharmony_ci	} else {
3558c2ecf20Sopenharmony_ci		unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci		if (map->m_flags & EROFS_MAP_FULL_MAPPED)
3588c2ecf20Sopenharmony_ci			llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci		while (llen > length &&
3618c2ecf20Sopenharmony_ci		       length != cmpxchg_relaxed(&pcl->length, length, llen)) {
3628c2ecf20Sopenharmony_ci			cpu_relax();
3638c2ecf20Sopenharmony_ci			length = READ_ONCE(pcl->length);
3648c2ecf20Sopenharmony_ci		}
3658c2ecf20Sopenharmony_ci	}
3668c2ecf20Sopenharmony_ci	mutex_lock(&cl->lock);
3678c2ecf20Sopenharmony_ci	/* used to check tail merging loop due to corrupted images */
3688c2ecf20Sopenharmony_ci	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
3698c2ecf20Sopenharmony_ci		clt->tailpcl = pcl;
3708c2ecf20Sopenharmony_ci	clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
3718c2ecf20Sopenharmony_ci	/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
3728c2ecf20Sopenharmony_ci	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
3738c2ecf20Sopenharmony_ci		clt->tailpcl = NULL;
3748c2ecf20Sopenharmony_ci	clt->cl = cl;
3758c2ecf20Sopenharmony_ci	return 0;
3768c2ecf20Sopenharmony_ci}
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_cistatic int z_erofs_register_collection(struct z_erofs_collector *clt,
3798c2ecf20Sopenharmony_ci				       struct inode *inode,
3808c2ecf20Sopenharmony_ci				       struct erofs_map_blocks *map)
3818c2ecf20Sopenharmony_ci{
3828c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *pcl;
3838c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl;
3848c2ecf20Sopenharmony_ci	struct erofs_workgroup *grp;
3858c2ecf20Sopenharmony_ci	int err;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	/* no available workgroup, let's allocate one */
3888c2ecf20Sopenharmony_ci	pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
3898c2ecf20Sopenharmony_ci	if (!pcl)
3908c2ecf20Sopenharmony_ci		return -ENOMEM;
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	atomic_set(&pcl->obj.refcount, 1);
3938c2ecf20Sopenharmony_ci	pcl->obj.index = map->m_pa >> PAGE_SHIFT;
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci	pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
3968c2ecf20Sopenharmony_ci		(map->m_flags & EROFS_MAP_FULL_MAPPED ?
3978c2ecf20Sopenharmony_ci			Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (map->m_flags & EROFS_MAP_ZIPPED)
4008c2ecf20Sopenharmony_ci		pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
4018c2ecf20Sopenharmony_ci	else
4028c2ecf20Sopenharmony_ci		pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0];
4058c2ecf20Sopenharmony_ci	pcl->clusterbits -= PAGE_SHIFT;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	/* new pclusters should be claimed as type 1, primary and followed */
4088c2ecf20Sopenharmony_ci	pcl->next = clt->owned_head;
4098c2ecf20Sopenharmony_ci	clt->mode = COLLECT_PRIMARY_FOLLOWED;
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	cl = z_erofs_primarycollection(pcl);
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci	/* must be cleaned before freeing to slab */
4148c2ecf20Sopenharmony_ci	DBG_BUGON(cl->nr_pages);
4158c2ecf20Sopenharmony_ci	DBG_BUGON(cl->vcnt);
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	cl->pageofs = map->m_la & ~PAGE_MASK;
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	/*
4208c2ecf20Sopenharmony_ci	 * lock all primary followed works before visible to others
4218c2ecf20Sopenharmony_ci	 * and mutex_trylock *never* fails for a new pcluster.
4228c2ecf20Sopenharmony_ci	 */
4238c2ecf20Sopenharmony_ci	DBG_BUGON(!mutex_trylock(&cl->lock));
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
4268c2ecf20Sopenharmony_ci	if (IS_ERR(grp)) {
4278c2ecf20Sopenharmony_ci		err = PTR_ERR(grp);
4288c2ecf20Sopenharmony_ci		goto err_out;
4298c2ecf20Sopenharmony_ci	}
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	if (grp != &pcl->obj) {
4328c2ecf20Sopenharmony_ci		clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
4338c2ecf20Sopenharmony_ci		err = -EEXIST;
4348c2ecf20Sopenharmony_ci		goto err_out;
4358c2ecf20Sopenharmony_ci	}
4368c2ecf20Sopenharmony_ci	/* used to check tail merging loop due to corrupted images */
4378c2ecf20Sopenharmony_ci	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
4388c2ecf20Sopenharmony_ci		clt->tailpcl = pcl;
4398c2ecf20Sopenharmony_ci	clt->owned_head = &pcl->next;
4408c2ecf20Sopenharmony_ci	clt->pcl = pcl;
4418c2ecf20Sopenharmony_ci	clt->cl = cl;
4428c2ecf20Sopenharmony_ci	return 0;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_cierr_out:
4458c2ecf20Sopenharmony_ci	mutex_unlock(&cl->lock);
4468c2ecf20Sopenharmony_ci	kmem_cache_free(pcluster_cachep, pcl);
4478c2ecf20Sopenharmony_ci	return err;
4488c2ecf20Sopenharmony_ci}
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_cistatic int z_erofs_collector_begin(struct z_erofs_collector *clt,
4518c2ecf20Sopenharmony_ci				   struct inode *inode,
4528c2ecf20Sopenharmony_ci				   struct erofs_map_blocks *map)
4538c2ecf20Sopenharmony_ci{
4548c2ecf20Sopenharmony_ci	struct erofs_workgroup *grp;
4558c2ecf20Sopenharmony_ci	int ret;
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	DBG_BUGON(clt->cl);
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
4608c2ecf20Sopenharmony_ci	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
4618c2ecf20Sopenharmony_ci	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	if (!PAGE_ALIGNED(map->m_pa)) {
4648c2ecf20Sopenharmony_ci		DBG_BUGON(1);
4658c2ecf20Sopenharmony_ci		return -EINVAL;
4668c2ecf20Sopenharmony_ci	}
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
4698c2ecf20Sopenharmony_ci	if (grp) {
4708c2ecf20Sopenharmony_ci		clt->pcl = container_of(grp, struct z_erofs_pcluster, obj);
4718c2ecf20Sopenharmony_ci	} else {
4728c2ecf20Sopenharmony_ci		ret = z_erofs_register_collection(clt, inode, map);
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci		if (!ret)
4758c2ecf20Sopenharmony_ci			goto out;
4768c2ecf20Sopenharmony_ci		if (ret != -EEXIST)
4778c2ecf20Sopenharmony_ci			return ret;
4788c2ecf20Sopenharmony_ci	}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	ret = z_erofs_lookup_collection(clt, inode, map);
4818c2ecf20Sopenharmony_ci	if (ret) {
4828c2ecf20Sopenharmony_ci		erofs_workgroup_put(&clt->pcl->obj);
4838c2ecf20Sopenharmony_ci		return ret;
4848c2ecf20Sopenharmony_ci	}
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ciout:
4878c2ecf20Sopenharmony_ci	z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
4888c2ecf20Sopenharmony_ci				  clt->cl->pagevec, clt->cl->vcnt);
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	clt->compressedpages = clt->pcl->compressed_pages;
4918c2ecf20Sopenharmony_ci	if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
4928c2ecf20Sopenharmony_ci		clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
4938c2ecf20Sopenharmony_ci	return 0;
4948c2ecf20Sopenharmony_ci}
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci/*
4978c2ecf20Sopenharmony_ci * keep in mind that no referenced pclusters will be freed
4988c2ecf20Sopenharmony_ci * only after a RCU grace period.
4998c2ecf20Sopenharmony_ci */
5008c2ecf20Sopenharmony_cistatic void z_erofs_rcu_callback(struct rcu_head *head)
5018c2ecf20Sopenharmony_ci{
5028c2ecf20Sopenharmony_ci	struct z_erofs_collection *const cl =
5038c2ecf20Sopenharmony_ci		container_of(head, struct z_erofs_collection, rcu);
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	kmem_cache_free(pcluster_cachep,
5068c2ecf20Sopenharmony_ci			container_of(cl, struct z_erofs_pcluster,
5078c2ecf20Sopenharmony_ci				     primary_collection));
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_civoid erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
5118c2ecf20Sopenharmony_ci{
5128c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *const pcl =
5138c2ecf20Sopenharmony_ci		container_of(grp, struct z_erofs_pcluster, obj);
5148c2ecf20Sopenharmony_ci	struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	call_rcu(&cl->rcu, z_erofs_rcu_callback);
5178c2ecf20Sopenharmony_ci}
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_cistatic void z_erofs_collection_put(struct z_erofs_collection *cl)
5208c2ecf20Sopenharmony_ci{
5218c2ecf20Sopenharmony_ci	struct z_erofs_pcluster *const pcl =
5228c2ecf20Sopenharmony_ci		container_of(cl, struct z_erofs_pcluster, primary_collection);
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	erofs_workgroup_put(&pcl->obj);
5258c2ecf20Sopenharmony_ci}
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_cistatic bool z_erofs_collector_end(struct z_erofs_collector *clt)
5288c2ecf20Sopenharmony_ci{
5298c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl = clt->cl;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	if (!cl)
5328c2ecf20Sopenharmony_ci		return false;
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	z_erofs_pagevec_ctor_exit(&clt->vector, false);
5358c2ecf20Sopenharmony_ci	mutex_unlock(&cl->lock);
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	/*
5388c2ecf20Sopenharmony_ci	 * if all pending pages are added, don't hold its reference
5398c2ecf20Sopenharmony_ci	 * any longer if the pcluster isn't hosted by ourselves.
5408c2ecf20Sopenharmony_ci	 */
5418c2ecf20Sopenharmony_ci	if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
5428c2ecf20Sopenharmony_ci		z_erofs_collection_put(cl);
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci	clt->cl = NULL;
5458c2ecf20Sopenharmony_ci	return true;
5468c2ecf20Sopenharmony_ci}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cistatic bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
5498c2ecf20Sopenharmony_ci				       unsigned int cachestrategy,
5508c2ecf20Sopenharmony_ci				       erofs_off_t la)
5518c2ecf20Sopenharmony_ci{
5528c2ecf20Sopenharmony_ci	if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
5538c2ecf20Sopenharmony_ci		return false;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	if (fe->backmost)
5568c2ecf20Sopenharmony_ci		return true;
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci	return cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
5598c2ecf20Sopenharmony_ci		la < fe->headoffset;
5608c2ecf20Sopenharmony_ci}
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_cistatic int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
5638c2ecf20Sopenharmony_ci				struct page *page)
5648c2ecf20Sopenharmony_ci{
5658c2ecf20Sopenharmony_ci	struct inode *const inode = fe->inode;
5668c2ecf20Sopenharmony_ci	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
5678c2ecf20Sopenharmony_ci	struct erofs_map_blocks *const map = &fe->map;
5688c2ecf20Sopenharmony_ci	struct z_erofs_collector *const clt = &fe->clt;
5698c2ecf20Sopenharmony_ci	const loff_t offset = page_offset(page);
5708c2ecf20Sopenharmony_ci	bool tight = true;
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	enum z_erofs_cache_alloctype cache_strategy;
5738c2ecf20Sopenharmony_ci	enum z_erofs_page_type page_type;
5748c2ecf20Sopenharmony_ci	unsigned int cur, end, spiltted, index;
5758c2ecf20Sopenharmony_ci	int err = 0;
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	/* register locked file pages as online pages in pack */
5788c2ecf20Sopenharmony_ci	z_erofs_onlinepage_init(page);
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	spiltted = 0;
5818c2ecf20Sopenharmony_ci	end = PAGE_SIZE;
5828c2ecf20Sopenharmony_cirepeat:
5838c2ecf20Sopenharmony_ci	cur = end - 1;
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	/* lucky, within the range of the current map_blocks */
5868c2ecf20Sopenharmony_ci	if (offset + cur >= map->m_la &&
5878c2ecf20Sopenharmony_ci	    offset + cur < map->m_la + map->m_llen) {
5888c2ecf20Sopenharmony_ci		/* didn't get a valid collection previously (very rare) */
5898c2ecf20Sopenharmony_ci		if (!clt->cl)
5908c2ecf20Sopenharmony_ci			goto restart_now;
5918c2ecf20Sopenharmony_ci		goto hitted;
5928c2ecf20Sopenharmony_ci	}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	/* go ahead the next map_blocks */
5958c2ecf20Sopenharmony_ci	erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur);
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	if (z_erofs_collector_end(clt))
5988c2ecf20Sopenharmony_ci		fe->backmost = false;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci	map->m_la = offset + cur;
6018c2ecf20Sopenharmony_ci	map->m_llen = 0;
6028c2ecf20Sopenharmony_ci	err = z_erofs_map_blocks_iter(inode, map, 0);
6038c2ecf20Sopenharmony_ci	if (err)
6048c2ecf20Sopenharmony_ci		goto err_out;
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_cirestart_now:
6078c2ecf20Sopenharmony_ci	if (!(map->m_flags & EROFS_MAP_MAPPED))
6088c2ecf20Sopenharmony_ci		goto hitted;
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	err = z_erofs_collector_begin(clt, inode, map);
6118c2ecf20Sopenharmony_ci	if (err)
6128c2ecf20Sopenharmony_ci		goto err_out;
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	/* preload all compressed pages (maybe downgrade role if necessary) */
6158c2ecf20Sopenharmony_ci	if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
6168c2ecf20Sopenharmony_ci		cache_strategy = DELAYEDALLOC;
6178c2ecf20Sopenharmony_ci	else
6188c2ecf20Sopenharmony_ci		cache_strategy = DONTALLOC;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	preload_compressed_pages(clt, MNGD_MAPPING(sbi), cache_strategy);
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_cihitted:
6238c2ecf20Sopenharmony_ci	/*
6248c2ecf20Sopenharmony_ci	 * Ensure the current partial page belongs to this submit chain rather
6258c2ecf20Sopenharmony_ci	 * than other concurrent submit chains or the noio(bypass) chain since
6268c2ecf20Sopenharmony_ci	 * those chains are handled asynchronously thus the page cannot be used
6278c2ecf20Sopenharmony_ci	 * for inplace I/O or pagevec (should be processed in strict order.)
6288c2ecf20Sopenharmony_ci	 */
6298c2ecf20Sopenharmony_ci	tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED &&
6308c2ecf20Sopenharmony_ci		  clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
6338c2ecf20Sopenharmony_ci	if (!(map->m_flags & EROFS_MAP_MAPPED)) {
6348c2ecf20Sopenharmony_ci		zero_user_segment(page, cur, end);
6358c2ecf20Sopenharmony_ci		++spiltted;
6368c2ecf20Sopenharmony_ci		tight = false;
6378c2ecf20Sopenharmony_ci		goto next_part;
6388c2ecf20Sopenharmony_ci	}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci	/* let's derive page type */
6418c2ecf20Sopenharmony_ci	page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
6428c2ecf20Sopenharmony_ci		(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
6438c2ecf20Sopenharmony_ci			(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
6448c2ecf20Sopenharmony_ci				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci	if (cur)
6478c2ecf20Sopenharmony_ci		tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ciretry:
6508c2ecf20Sopenharmony_ci	err = z_erofs_attach_page(clt, page, page_type,
6518c2ecf20Sopenharmony_ci				  clt->mode >= COLLECT_PRIMARY_FOLLOWED);
6528c2ecf20Sopenharmony_ci	/* should allocate an additional staging page for pagevec */
6538c2ecf20Sopenharmony_ci	if (err == -EAGAIN) {
6548c2ecf20Sopenharmony_ci		struct page *const newpage =
6558c2ecf20Sopenharmony_ci				alloc_page(GFP_NOFS | __GFP_NOFAIL);
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci		newpage->mapping = Z_EROFS_MAPPING_STAGING;
6588c2ecf20Sopenharmony_ci		err = z_erofs_attach_page(clt, newpage,
6598c2ecf20Sopenharmony_ci					  Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
6608c2ecf20Sopenharmony_ci		if (!err)
6618c2ecf20Sopenharmony_ci			goto retry;
6628c2ecf20Sopenharmony_ci	}
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	if (err)
6658c2ecf20Sopenharmony_ci		goto err_out;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	index = page->index - (map->m_la >> PAGE_SHIFT);
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	z_erofs_onlinepage_fixup(page, index, true);
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	/* bump up the number of spiltted parts of a page */
6728c2ecf20Sopenharmony_ci	++spiltted;
6738c2ecf20Sopenharmony_ci	/* also update nr_pages */
6748c2ecf20Sopenharmony_ci	clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
6758c2ecf20Sopenharmony_cinext_part:
6768c2ecf20Sopenharmony_ci	/* can be used for verification */
6778c2ecf20Sopenharmony_ci	map->m_llen = offset + cur - map->m_la;
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	end = cur;
6808c2ecf20Sopenharmony_ci	if (end > 0)
6818c2ecf20Sopenharmony_ci		goto repeat;
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ciout:
6848c2ecf20Sopenharmony_ci	z_erofs_onlinepage_endio(page);
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci	erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
6878c2ecf20Sopenharmony_ci		  __func__, page, spiltted, map->m_llen);
6888c2ecf20Sopenharmony_ci	return err;
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	/* if some error occurred while processing this page */
6918c2ecf20Sopenharmony_cierr_out:
6928c2ecf20Sopenharmony_ci	SetPageError(page);
6938c2ecf20Sopenharmony_ci	goto out;
6948c2ecf20Sopenharmony_ci}
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_cistatic void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
6978c2ecf20Sopenharmony_ci				       bool sync, int bios)
6988c2ecf20Sopenharmony_ci{
6998c2ecf20Sopenharmony_ci	/* wake up the caller thread for sync decompression */
7008c2ecf20Sopenharmony_ci	if (sync) {
7018c2ecf20Sopenharmony_ci		unsigned long flags;
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci		spin_lock_irqsave(&io->u.wait.lock, flags);
7048c2ecf20Sopenharmony_ci		if (!atomic_add_return(bios, &io->pending_bios))
7058c2ecf20Sopenharmony_ci			wake_up_locked(&io->u.wait);
7068c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&io->u.wait.lock, flags);
7078c2ecf20Sopenharmony_ci		return;
7088c2ecf20Sopenharmony_ci	}
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci	if (!atomic_add_return(bios, &io->pending_bios))
7118c2ecf20Sopenharmony_ci		queue_work(z_erofs_workqueue, &io->u.work);
7128c2ecf20Sopenharmony_ci}
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_cistatic void z_erofs_decompressqueue_endio(struct bio *bio)
7158c2ecf20Sopenharmony_ci{
7168c2ecf20Sopenharmony_ci	tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
7178c2ecf20Sopenharmony_ci	struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
7188c2ecf20Sopenharmony_ci	blk_status_t err = bio->bi_status;
7198c2ecf20Sopenharmony_ci	struct bio_vec *bvec;
7208c2ecf20Sopenharmony_ci	struct bvec_iter_all iter_all;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	bio_for_each_segment_all(bvec, bio, iter_all) {
7238c2ecf20Sopenharmony_ci		struct page *page = bvec->bv_page;
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci		DBG_BUGON(PageUptodate(page));
7268c2ecf20Sopenharmony_ci		DBG_BUGON(!page->mapping);
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci		if (err)
7298c2ecf20Sopenharmony_ci			SetPageError(page);
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci		if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
7328c2ecf20Sopenharmony_ci			if (!err)
7338c2ecf20Sopenharmony_ci				SetPageUptodate(page);
7348c2ecf20Sopenharmony_ci			unlock_page(page);
7358c2ecf20Sopenharmony_ci		}
7368c2ecf20Sopenharmony_ci	}
7378c2ecf20Sopenharmony_ci	z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
7388c2ecf20Sopenharmony_ci	bio_put(bio);
7398c2ecf20Sopenharmony_ci}
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_cistatic int z_erofs_decompress_pcluster(struct super_block *sb,
7428c2ecf20Sopenharmony_ci				       struct z_erofs_pcluster *pcl,
7438c2ecf20Sopenharmony_ci				       struct list_head *pagepool)
7448c2ecf20Sopenharmony_ci{
7458c2ecf20Sopenharmony_ci	struct erofs_sb_info *const sbi = EROFS_SB(sb);
7468c2ecf20Sopenharmony_ci	const unsigned int clusterpages = BIT(pcl->clusterbits);
7478c2ecf20Sopenharmony_ci	struct z_erofs_pagevec_ctor ctor;
7488c2ecf20Sopenharmony_ci	unsigned int i, outputsize, llen, nr_pages;
7498c2ecf20Sopenharmony_ci	struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
7508c2ecf20Sopenharmony_ci	struct page **pages, **compressed_pages, *page;
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	enum z_erofs_page_type page_type;
7538c2ecf20Sopenharmony_ci	bool overlapped, partial;
7548c2ecf20Sopenharmony_ci	struct z_erofs_collection *cl;
7558c2ecf20Sopenharmony_ci	int err;
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_ci	might_sleep();
7588c2ecf20Sopenharmony_ci	cl = z_erofs_primarycollection(pcl);
7598c2ecf20Sopenharmony_ci	DBG_BUGON(!READ_ONCE(cl->nr_pages));
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	mutex_lock(&cl->lock);
7628c2ecf20Sopenharmony_ci	nr_pages = cl->nr_pages;
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
7658c2ecf20Sopenharmony_ci		pages = pages_onstack;
7668c2ecf20Sopenharmony_ci	} else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
7678c2ecf20Sopenharmony_ci		   mutex_trylock(&z_pagemap_global_lock)) {
7688c2ecf20Sopenharmony_ci		pages = z_pagemap_global;
7698c2ecf20Sopenharmony_ci	} else {
7708c2ecf20Sopenharmony_ci		gfp_t gfp_flags = GFP_KERNEL;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci		if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
7738c2ecf20Sopenharmony_ci			gfp_flags |= __GFP_NOFAIL;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci		pages = kvmalloc_array(nr_pages, sizeof(struct page *),
7768c2ecf20Sopenharmony_ci				       gfp_flags);
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci		/* fallback to global pagemap for the lowmem scenario */
7798c2ecf20Sopenharmony_ci		if (!pages) {
7808c2ecf20Sopenharmony_ci			mutex_lock(&z_pagemap_global_lock);
7818c2ecf20Sopenharmony_ci			pages = z_pagemap_global;
7828c2ecf20Sopenharmony_ci		}
7838c2ecf20Sopenharmony_ci	}
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	for (i = 0; i < nr_pages; ++i)
7868c2ecf20Sopenharmony_ci		pages[i] = NULL;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	err = 0;
7898c2ecf20Sopenharmony_ci	z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
7908c2ecf20Sopenharmony_ci				  cl->pagevec, 0);
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	for (i = 0; i < cl->vcnt; ++i) {
7938c2ecf20Sopenharmony_ci		unsigned int pagenr;
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci		page = z_erofs_pagevec_dequeue(&ctor, &page_type);
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci		/* all pages in pagevec ought to be valid */
7988c2ecf20Sopenharmony_ci		DBG_BUGON(!page);
7998c2ecf20Sopenharmony_ci		DBG_BUGON(!page->mapping);
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci		if (z_erofs_put_stagingpage(pagepool, page))
8028c2ecf20Sopenharmony_ci			continue;
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
8058c2ecf20Sopenharmony_ci			pagenr = 0;
8068c2ecf20Sopenharmony_ci		else
8078c2ecf20Sopenharmony_ci			pagenr = z_erofs_onlinepage_index(page);
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci		DBG_BUGON(pagenr >= nr_pages);
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci		/*
8128c2ecf20Sopenharmony_ci		 * currently EROFS doesn't support multiref(dedup),
8138c2ecf20Sopenharmony_ci		 * so here erroring out one multiref page.
8148c2ecf20Sopenharmony_ci		 */
8158c2ecf20Sopenharmony_ci		if (pages[pagenr]) {
8168c2ecf20Sopenharmony_ci			DBG_BUGON(1);
8178c2ecf20Sopenharmony_ci			SetPageError(pages[pagenr]);
8188c2ecf20Sopenharmony_ci			z_erofs_onlinepage_endio(pages[pagenr]);
8198c2ecf20Sopenharmony_ci			err = -EFSCORRUPTED;
8208c2ecf20Sopenharmony_ci		}
8218c2ecf20Sopenharmony_ci		pages[pagenr] = page;
8228c2ecf20Sopenharmony_ci	}
8238c2ecf20Sopenharmony_ci	z_erofs_pagevec_ctor_exit(&ctor, true);
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_ci	overlapped = false;
8268c2ecf20Sopenharmony_ci	compressed_pages = pcl->compressed_pages;
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci	for (i = 0; i < clusterpages; ++i) {
8298c2ecf20Sopenharmony_ci		unsigned int pagenr;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci		page = compressed_pages[i];
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci		/* all compressed pages ought to be valid */
8348c2ecf20Sopenharmony_ci		DBG_BUGON(!page);
8358c2ecf20Sopenharmony_ci		DBG_BUGON(!page->mapping);
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci		if (!z_erofs_page_is_staging(page)) {
8388c2ecf20Sopenharmony_ci			if (erofs_page_is_managed(sbi, page)) {
8398c2ecf20Sopenharmony_ci				if (!PageUptodate(page))
8408c2ecf20Sopenharmony_ci					err = -EIO;
8418c2ecf20Sopenharmony_ci				continue;
8428c2ecf20Sopenharmony_ci			}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci			/*
8458c2ecf20Sopenharmony_ci			 * only if non-head page can be selected
8468c2ecf20Sopenharmony_ci			 * for inplace decompression
8478c2ecf20Sopenharmony_ci			 */
8488c2ecf20Sopenharmony_ci			pagenr = z_erofs_onlinepage_index(page);
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci			DBG_BUGON(pagenr >= nr_pages);
8518c2ecf20Sopenharmony_ci			if (pages[pagenr]) {
8528c2ecf20Sopenharmony_ci				DBG_BUGON(1);
8538c2ecf20Sopenharmony_ci				SetPageError(pages[pagenr]);
8548c2ecf20Sopenharmony_ci				z_erofs_onlinepage_endio(pages[pagenr]);
8558c2ecf20Sopenharmony_ci				err = -EFSCORRUPTED;
8568c2ecf20Sopenharmony_ci			}
8578c2ecf20Sopenharmony_ci			pages[pagenr] = page;
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci			overlapped = true;
8608c2ecf20Sopenharmony_ci		}
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		/* PG_error needs checking for inplaced and staging pages */
8638c2ecf20Sopenharmony_ci		if (PageError(page)) {
8648c2ecf20Sopenharmony_ci			DBG_BUGON(PageUptodate(page));
8658c2ecf20Sopenharmony_ci			err = -EIO;
8668c2ecf20Sopenharmony_ci		}
8678c2ecf20Sopenharmony_ci	}
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	if (err)
8708c2ecf20Sopenharmony_ci		goto out;
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci	llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
8738c2ecf20Sopenharmony_ci	if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) {
8748c2ecf20Sopenharmony_ci		outputsize = llen;
8758c2ecf20Sopenharmony_ci		partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
8768c2ecf20Sopenharmony_ci	} else {
8778c2ecf20Sopenharmony_ci		outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs;
8788c2ecf20Sopenharmony_ci		partial = true;
8798c2ecf20Sopenharmony_ci	}
8808c2ecf20Sopenharmony_ci
8818c2ecf20Sopenharmony_ci	err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
8828c2ecf20Sopenharmony_ci					.sb = sb,
8838c2ecf20Sopenharmony_ci					.in = compressed_pages,
8848c2ecf20Sopenharmony_ci					.out = pages,
8858c2ecf20Sopenharmony_ci					.pageofs_out = cl->pageofs,
8868c2ecf20Sopenharmony_ci					.inputsize = PAGE_SIZE,
8878c2ecf20Sopenharmony_ci					.outputsize = outputsize,
8888c2ecf20Sopenharmony_ci					.alg = pcl->algorithmformat,
8898c2ecf20Sopenharmony_ci					.inplace_io = overlapped,
8908c2ecf20Sopenharmony_ci					.partial_decoding = partial
8918c2ecf20Sopenharmony_ci				 }, pagepool);
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ciout:
8948c2ecf20Sopenharmony_ci	/* must handle all compressed pages before endding pages */
8958c2ecf20Sopenharmony_ci	for (i = 0; i < clusterpages; ++i) {
8968c2ecf20Sopenharmony_ci		page = compressed_pages[i];
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci		if (erofs_page_is_managed(sbi, page))
8998c2ecf20Sopenharmony_ci			continue;
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_ci		/* recycle all individual staging pages */
9028c2ecf20Sopenharmony_ci		(void)z_erofs_put_stagingpage(pagepool, page);
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci		WRITE_ONCE(compressed_pages[i], NULL);
9058c2ecf20Sopenharmony_ci	}
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci	for (i = 0; i < nr_pages; ++i) {
9088c2ecf20Sopenharmony_ci		page = pages[i];
9098c2ecf20Sopenharmony_ci		if (!page)
9108c2ecf20Sopenharmony_ci			continue;
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci		DBG_BUGON(!page->mapping);
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci		/* recycle all individual staging pages */
9158c2ecf20Sopenharmony_ci		if (z_erofs_put_stagingpage(pagepool, page))
9168c2ecf20Sopenharmony_ci			continue;
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci		if (err < 0)
9198c2ecf20Sopenharmony_ci			SetPageError(page);
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci		z_erofs_onlinepage_endio(page);
9228c2ecf20Sopenharmony_ci	}
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	if (pages == z_pagemap_global)
9258c2ecf20Sopenharmony_ci		mutex_unlock(&z_pagemap_global_lock);
9268c2ecf20Sopenharmony_ci	else if (pages != pages_onstack)
9278c2ecf20Sopenharmony_ci		kvfree(pages);
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	cl->nr_pages = 0;
9308c2ecf20Sopenharmony_ci	cl->vcnt = 0;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	/* all cl locks MUST be taken before the following line */
9338c2ecf20Sopenharmony_ci	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci	/* all cl locks SHOULD be released right now */
9368c2ecf20Sopenharmony_ci	mutex_unlock(&cl->lock);
9378c2ecf20Sopenharmony_ci
9388c2ecf20Sopenharmony_ci	z_erofs_collection_put(cl);
9398c2ecf20Sopenharmony_ci	return err;
9408c2ecf20Sopenharmony_ci}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_cistatic void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
9438c2ecf20Sopenharmony_ci				     struct list_head *pagepool)
9448c2ecf20Sopenharmony_ci{
9458c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t owned = io->head;
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci	while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
9488c2ecf20Sopenharmony_ci		struct z_erofs_pcluster *pcl;
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci		/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
9518c2ecf20Sopenharmony_ci		DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci		/* no possible that 'owned' equals NULL */
9548c2ecf20Sopenharmony_ci		DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci		pcl = container_of(owned, struct z_erofs_pcluster, next);
9578c2ecf20Sopenharmony_ci		owned = READ_ONCE(pcl->next);
9588c2ecf20Sopenharmony_ci
9598c2ecf20Sopenharmony_ci		z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
9608c2ecf20Sopenharmony_ci	}
9618c2ecf20Sopenharmony_ci}
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_cistatic void z_erofs_decompressqueue_work(struct work_struct *work)
9648c2ecf20Sopenharmony_ci{
9658c2ecf20Sopenharmony_ci	struct z_erofs_decompressqueue *bgq =
9668c2ecf20Sopenharmony_ci		container_of(work, struct z_erofs_decompressqueue, u.work);
9678c2ecf20Sopenharmony_ci	LIST_HEAD(pagepool);
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_ci	DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
9708c2ecf20Sopenharmony_ci	z_erofs_decompress_queue(bgq, &pagepool);
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	put_pages_list(&pagepool);
9738c2ecf20Sopenharmony_ci	kvfree(bgq);
9748c2ecf20Sopenharmony_ci}
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_cistatic struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
9778c2ecf20Sopenharmony_ci					       unsigned int nr,
9788c2ecf20Sopenharmony_ci					       struct list_head *pagepool,
9798c2ecf20Sopenharmony_ci					       struct address_space *mc,
9808c2ecf20Sopenharmony_ci					       gfp_t gfp)
9818c2ecf20Sopenharmony_ci{
9828c2ecf20Sopenharmony_ci	const pgoff_t index = pcl->obj.index;
9838c2ecf20Sopenharmony_ci	bool tocache = false;
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_ci	struct address_space *mapping;
9868c2ecf20Sopenharmony_ci	struct page *oldpage, *page;
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_ci	compressed_page_t t;
9898c2ecf20Sopenharmony_ci	int justfound;
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_cirepeat:
9928c2ecf20Sopenharmony_ci	page = READ_ONCE(pcl->compressed_pages[nr]);
9938c2ecf20Sopenharmony_ci	oldpage = page;
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	if (!page)
9968c2ecf20Sopenharmony_ci		goto out_allocpage;
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	/*
9998c2ecf20Sopenharmony_ci	 * the cached page has not been allocated and
10008c2ecf20Sopenharmony_ci	 * an placeholder is out there, prepare it now.
10018c2ecf20Sopenharmony_ci	 */
10028c2ecf20Sopenharmony_ci	if (page == PAGE_UNALLOCATED) {
10038c2ecf20Sopenharmony_ci		tocache = true;
10048c2ecf20Sopenharmony_ci		goto out_allocpage;
10058c2ecf20Sopenharmony_ci	}
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_ci	/* process the target tagged pointer */
10088c2ecf20Sopenharmony_ci	t = tagptr_init(compressed_page_t, page);
10098c2ecf20Sopenharmony_ci	justfound = tagptr_unfold_tags(t);
10108c2ecf20Sopenharmony_ci	page = tagptr_unfold_ptr(t);
10118c2ecf20Sopenharmony_ci
10128c2ecf20Sopenharmony_ci	mapping = READ_ONCE(page->mapping);
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci	/*
10158c2ecf20Sopenharmony_ci	 * unmanaged (file) pages are all locked solidly,
10168c2ecf20Sopenharmony_ci	 * therefore it is impossible for `mapping' to be NULL.
10178c2ecf20Sopenharmony_ci	 */
10188c2ecf20Sopenharmony_ci	if (mapping && mapping != mc)
10198c2ecf20Sopenharmony_ci		/* ought to be unmanaged pages */
10208c2ecf20Sopenharmony_ci		goto out;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	lock_page(page);
10238c2ecf20Sopenharmony_ci
10248c2ecf20Sopenharmony_ci	/* only true if page reclaim goes wrong, should never happen */
10258c2ecf20Sopenharmony_ci	DBG_BUGON(justfound && PagePrivate(page));
10268c2ecf20Sopenharmony_ci
10278c2ecf20Sopenharmony_ci	/* the page is still in manage cache */
10288c2ecf20Sopenharmony_ci	if (page->mapping == mc) {
10298c2ecf20Sopenharmony_ci		WRITE_ONCE(pcl->compressed_pages[nr], page);
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci		ClearPageError(page);
10328c2ecf20Sopenharmony_ci		if (!PagePrivate(page)) {
10338c2ecf20Sopenharmony_ci			/*
10348c2ecf20Sopenharmony_ci			 * impossible to be !PagePrivate(page) for
10358c2ecf20Sopenharmony_ci			 * the current restriction as well if
10368c2ecf20Sopenharmony_ci			 * the page is already in compressed_pages[].
10378c2ecf20Sopenharmony_ci			 */
10388c2ecf20Sopenharmony_ci			DBG_BUGON(!justfound);
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci			justfound = 0;
10418c2ecf20Sopenharmony_ci			set_page_private(page, (unsigned long)pcl);
10428c2ecf20Sopenharmony_ci			SetPagePrivate(page);
10438c2ecf20Sopenharmony_ci		}
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_ci		/* no need to submit io if it is already up-to-date */
10468c2ecf20Sopenharmony_ci		if (PageUptodate(page)) {
10478c2ecf20Sopenharmony_ci			unlock_page(page);
10488c2ecf20Sopenharmony_ci			page = NULL;
10498c2ecf20Sopenharmony_ci		}
10508c2ecf20Sopenharmony_ci		goto out;
10518c2ecf20Sopenharmony_ci	}
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	/*
10548c2ecf20Sopenharmony_ci	 * the managed page has been truncated, it's unsafe to
10558c2ecf20Sopenharmony_ci	 * reuse this one, let's allocate a new cache-managed page.
10568c2ecf20Sopenharmony_ci	 */
10578c2ecf20Sopenharmony_ci	DBG_BUGON(page->mapping);
10588c2ecf20Sopenharmony_ci	DBG_BUGON(!justfound);
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	tocache = true;
10618c2ecf20Sopenharmony_ci	unlock_page(page);
10628c2ecf20Sopenharmony_ci	put_page(page);
10638c2ecf20Sopenharmony_ciout_allocpage:
10648c2ecf20Sopenharmony_ci	page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
10658c2ecf20Sopenharmony_ci	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
10668c2ecf20Sopenharmony_ci		/* non-LRU / non-movable temporary page is needed */
10678c2ecf20Sopenharmony_ci		page->mapping = Z_EROFS_MAPPING_STAGING;
10688c2ecf20Sopenharmony_ci		tocache = false;
10698c2ecf20Sopenharmony_ci	}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
10728c2ecf20Sopenharmony_ci		if (tocache) {
10738c2ecf20Sopenharmony_ci			/* since it added to managed cache successfully */
10748c2ecf20Sopenharmony_ci			unlock_page(page);
10758c2ecf20Sopenharmony_ci			put_page(page);
10768c2ecf20Sopenharmony_ci		} else {
10778c2ecf20Sopenharmony_ci			list_add(&page->lru, pagepool);
10788c2ecf20Sopenharmony_ci		}
10798c2ecf20Sopenharmony_ci		cond_resched();
10808c2ecf20Sopenharmony_ci		goto repeat;
10818c2ecf20Sopenharmony_ci	}
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_ci	if (tocache) {
10848c2ecf20Sopenharmony_ci		set_page_private(page, (unsigned long)pcl);
10858c2ecf20Sopenharmony_ci		SetPagePrivate(page);
10868c2ecf20Sopenharmony_ci	}
10878c2ecf20Sopenharmony_ciout:	/* the only exit (for tracing and debugging) */
10888c2ecf20Sopenharmony_ci	return page;
10898c2ecf20Sopenharmony_ci}
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_cistatic struct z_erofs_decompressqueue *
10928c2ecf20Sopenharmony_cijobqueue_init(struct super_block *sb,
10938c2ecf20Sopenharmony_ci	      struct z_erofs_decompressqueue *fgq, bool *fg)
10948c2ecf20Sopenharmony_ci{
10958c2ecf20Sopenharmony_ci	struct z_erofs_decompressqueue *q;
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	if (fg && !*fg) {
10988c2ecf20Sopenharmony_ci		q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN);
10998c2ecf20Sopenharmony_ci		if (!q) {
11008c2ecf20Sopenharmony_ci			*fg = true;
11018c2ecf20Sopenharmony_ci			goto fg_out;
11028c2ecf20Sopenharmony_ci		}
11038c2ecf20Sopenharmony_ci		INIT_WORK(&q->u.work, z_erofs_decompressqueue_work);
11048c2ecf20Sopenharmony_ci	} else {
11058c2ecf20Sopenharmony_cifg_out:
11068c2ecf20Sopenharmony_ci		q = fgq;
11078c2ecf20Sopenharmony_ci		init_waitqueue_head(&fgq->u.wait);
11088c2ecf20Sopenharmony_ci		atomic_set(&fgq->pending_bios, 0);
11098c2ecf20Sopenharmony_ci	}
11108c2ecf20Sopenharmony_ci	q->sb = sb;
11118c2ecf20Sopenharmony_ci	q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
11128c2ecf20Sopenharmony_ci	return q;
11138c2ecf20Sopenharmony_ci}
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci/* define decompression jobqueue types */
11168c2ecf20Sopenharmony_cienum {
11178c2ecf20Sopenharmony_ci	JQ_BYPASS,
11188c2ecf20Sopenharmony_ci	JQ_SUBMIT,
11198c2ecf20Sopenharmony_ci	NR_JOBQUEUES,
11208c2ecf20Sopenharmony_ci};
11218c2ecf20Sopenharmony_ci
11228c2ecf20Sopenharmony_cistatic void *jobqueueset_init(struct super_block *sb,
11238c2ecf20Sopenharmony_ci			      struct z_erofs_decompressqueue *q[],
11248c2ecf20Sopenharmony_ci			      struct z_erofs_decompressqueue *fgq, bool *fg)
11258c2ecf20Sopenharmony_ci{
11268c2ecf20Sopenharmony_ci	/*
11278c2ecf20Sopenharmony_ci	 * if managed cache is enabled, bypass jobqueue is needed,
11288c2ecf20Sopenharmony_ci	 * no need to read from device for all pclusters in this queue.
11298c2ecf20Sopenharmony_ci	 */
11308c2ecf20Sopenharmony_ci	q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
11318c2ecf20Sopenharmony_ci	q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, fg);
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci	return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], *fg));
11348c2ecf20Sopenharmony_ci}
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_cistatic void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
11378c2ecf20Sopenharmony_ci				    z_erofs_next_pcluster_t qtail[],
11388c2ecf20Sopenharmony_ci				    z_erofs_next_pcluster_t owned_head)
11398c2ecf20Sopenharmony_ci{
11408c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
11418c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci	DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
11448c2ecf20Sopenharmony_ci	if (owned_head == Z_EROFS_PCLUSTER_TAIL)
11458c2ecf20Sopenharmony_ci		owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
11468c2ecf20Sopenharmony_ci
11478c2ecf20Sopenharmony_ci	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_ci	WRITE_ONCE(*submit_qtail, owned_head);
11508c2ecf20Sopenharmony_ci	WRITE_ONCE(*bypass_qtail, &pcl->next);
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	qtail[JQ_BYPASS] = &pcl->next;
11538c2ecf20Sopenharmony_ci}
11548c2ecf20Sopenharmony_ci
11558c2ecf20Sopenharmony_cistatic void z_erofs_submit_queue(struct super_block *sb,
11568c2ecf20Sopenharmony_ci				 struct z_erofs_decompress_frontend *f,
11578c2ecf20Sopenharmony_ci				 struct list_head *pagepool,
11588c2ecf20Sopenharmony_ci				 struct z_erofs_decompressqueue *fgq,
11598c2ecf20Sopenharmony_ci				 bool *force_fg)
11608c2ecf20Sopenharmony_ci{
11618c2ecf20Sopenharmony_ci	struct erofs_sb_info *const sbi = EROFS_SB(sb);
11628c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
11638c2ecf20Sopenharmony_ci	struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
11648c2ecf20Sopenharmony_ci	void *bi_private;
11658c2ecf20Sopenharmony_ci	z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
11668c2ecf20Sopenharmony_ci	/* since bio will be NULL, no need to initialize last_index */
11678c2ecf20Sopenharmony_ci	pgoff_t last_index;
11688c2ecf20Sopenharmony_ci	unsigned int nr_bios = 0;
11698c2ecf20Sopenharmony_ci	struct bio *bio = NULL;
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	bi_private = jobqueueset_init(sb, q, fgq, force_fg);
11728c2ecf20Sopenharmony_ci	qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
11738c2ecf20Sopenharmony_ci	qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
11748c2ecf20Sopenharmony_ci
11758c2ecf20Sopenharmony_ci	/* by default, all need io submission */
11768c2ecf20Sopenharmony_ci	q[JQ_SUBMIT]->head = owned_head;
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_ci	do {
11798c2ecf20Sopenharmony_ci		struct z_erofs_pcluster *pcl;
11808c2ecf20Sopenharmony_ci		pgoff_t cur, end;
11818c2ecf20Sopenharmony_ci		unsigned int i = 0;
11828c2ecf20Sopenharmony_ci		bool bypass = true;
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci		/* no possible 'owned_head' equals the following */
11858c2ecf20Sopenharmony_ci		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
11868c2ecf20Sopenharmony_ci		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
11878c2ecf20Sopenharmony_ci
11888c2ecf20Sopenharmony_ci		pcl = container_of(owned_head, struct z_erofs_pcluster, next);
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci		cur = pcl->obj.index;
11918c2ecf20Sopenharmony_ci		end = cur + BIT(pcl->clusterbits);
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci		/* close the main owned chain at first */
11948c2ecf20Sopenharmony_ci		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
11958c2ecf20Sopenharmony_ci				     Z_EROFS_PCLUSTER_TAIL_CLOSED);
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci		do {
11988c2ecf20Sopenharmony_ci			struct page *page;
11998c2ecf20Sopenharmony_ci
12008c2ecf20Sopenharmony_ci			page = pickup_page_for_submission(pcl, i++, pagepool,
12018c2ecf20Sopenharmony_ci							  MNGD_MAPPING(sbi),
12028c2ecf20Sopenharmony_ci							  GFP_NOFS);
12038c2ecf20Sopenharmony_ci			if (!page)
12048c2ecf20Sopenharmony_ci				continue;
12058c2ecf20Sopenharmony_ci
12068c2ecf20Sopenharmony_ci			if (bio && cur != last_index + 1) {
12078c2ecf20Sopenharmony_cisubmit_bio_retry:
12088c2ecf20Sopenharmony_ci				submit_bio(bio);
12098c2ecf20Sopenharmony_ci				bio = NULL;
12108c2ecf20Sopenharmony_ci			}
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci			if (!bio) {
12138c2ecf20Sopenharmony_ci				bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
12148c2ecf20Sopenharmony_ci
12158c2ecf20Sopenharmony_ci				bio->bi_end_io = z_erofs_decompressqueue_endio;
12168c2ecf20Sopenharmony_ci				bio_set_dev(bio, sb->s_bdev);
12178c2ecf20Sopenharmony_ci				bio->bi_iter.bi_sector = (sector_t)cur <<
12188c2ecf20Sopenharmony_ci					LOG_SECTORS_PER_BLOCK;
12198c2ecf20Sopenharmony_ci				bio->bi_private = bi_private;
12208c2ecf20Sopenharmony_ci				bio->bi_opf = REQ_OP_READ;
12218c2ecf20Sopenharmony_ci				if (f->readahead)
12228c2ecf20Sopenharmony_ci					bio->bi_opf |= REQ_RAHEAD;
12238c2ecf20Sopenharmony_ci				++nr_bios;
12248c2ecf20Sopenharmony_ci			}
12258c2ecf20Sopenharmony_ci
12268c2ecf20Sopenharmony_ci			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
12278c2ecf20Sopenharmony_ci				goto submit_bio_retry;
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_ci			last_index = cur;
12308c2ecf20Sopenharmony_ci			bypass = false;
12318c2ecf20Sopenharmony_ci		} while (++cur < end);
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_ci		if (!bypass)
12348c2ecf20Sopenharmony_ci			qtail[JQ_SUBMIT] = &pcl->next;
12358c2ecf20Sopenharmony_ci		else
12368c2ecf20Sopenharmony_ci			move_to_bypass_jobqueue(pcl, qtail, owned_head);
12378c2ecf20Sopenharmony_ci	} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
12388c2ecf20Sopenharmony_ci
12398c2ecf20Sopenharmony_ci	if (bio)
12408c2ecf20Sopenharmony_ci		submit_bio(bio);
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci	/*
12438c2ecf20Sopenharmony_ci	 * although background is preferred, no one is pending for submission.
12448c2ecf20Sopenharmony_ci	 * don't issue workqueue for decompression but drop it directly instead.
12458c2ecf20Sopenharmony_ci	 */
12468c2ecf20Sopenharmony_ci	if (!*force_fg && !nr_bios) {
12478c2ecf20Sopenharmony_ci		kvfree(q[JQ_SUBMIT]);
12488c2ecf20Sopenharmony_ci		return;
12498c2ecf20Sopenharmony_ci	}
12508c2ecf20Sopenharmony_ci	z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
12518c2ecf20Sopenharmony_ci}
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_cistatic void z_erofs_runqueue(struct super_block *sb,
12548c2ecf20Sopenharmony_ci			     struct z_erofs_decompress_frontend *f,
12558c2ecf20Sopenharmony_ci			     struct list_head *pagepool, bool force_fg)
12568c2ecf20Sopenharmony_ci{
12578c2ecf20Sopenharmony_ci	struct z_erofs_decompressqueue io[NR_JOBQUEUES];
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_ci	if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL)
12608c2ecf20Sopenharmony_ci		return;
12618c2ecf20Sopenharmony_ci	z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	/* handle bypass queue (no i/o pclusters) immediately */
12648c2ecf20Sopenharmony_ci	z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
12658c2ecf20Sopenharmony_ci
12668c2ecf20Sopenharmony_ci	if (!force_fg)
12678c2ecf20Sopenharmony_ci		return;
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	/* wait until all bios are completed */
12708c2ecf20Sopenharmony_ci	io_wait_event(io[JQ_SUBMIT].u.wait,
12718c2ecf20Sopenharmony_ci		      !atomic_read(&io[JQ_SUBMIT].pending_bios));
12728c2ecf20Sopenharmony_ci
12738c2ecf20Sopenharmony_ci	/* handle synchronous decompress queue in the caller context */
12748c2ecf20Sopenharmony_ci	z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
12758c2ecf20Sopenharmony_ci}
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_cistatic int z_erofs_readpage(struct file *file, struct page *page)
12788c2ecf20Sopenharmony_ci{
12798c2ecf20Sopenharmony_ci	struct inode *const inode = page->mapping->host;
12808c2ecf20Sopenharmony_ci	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
12818c2ecf20Sopenharmony_ci	int err;
12828c2ecf20Sopenharmony_ci	LIST_HEAD(pagepool);
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	trace_erofs_readpage(page, false);
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_ci	err = z_erofs_do_read_page(&f, page);
12898c2ecf20Sopenharmony_ci	(void)z_erofs_collector_end(&f.clt);
12908c2ecf20Sopenharmony_ci
12918c2ecf20Sopenharmony_ci	/* if some compressed cluster ready, need submit them anyway */
12928c2ecf20Sopenharmony_ci	z_erofs_runqueue(inode->i_sb, &f, &pagepool, true);
12938c2ecf20Sopenharmony_ci
12948c2ecf20Sopenharmony_ci	if (err)
12958c2ecf20Sopenharmony_ci		erofs_err(inode->i_sb, "failed to read, err [%d]", err);
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	if (f.map.mpage)
12988c2ecf20Sopenharmony_ci		put_page(f.map.mpage);
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci	/* clean up the remaining free pages */
13018c2ecf20Sopenharmony_ci	put_pages_list(&pagepool);
13028c2ecf20Sopenharmony_ci	return err;
13038c2ecf20Sopenharmony_ci}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_cistatic void z_erofs_readahead(struct readahead_control *rac)
13068c2ecf20Sopenharmony_ci{
13078c2ecf20Sopenharmony_ci	struct inode *const inode = rac->mapping->host;
13088c2ecf20Sopenharmony_ci	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci	unsigned int nr_pages = readahead_count(rac);
13118c2ecf20Sopenharmony_ci	bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
13128c2ecf20Sopenharmony_ci	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
13138c2ecf20Sopenharmony_ci	struct page *page, *head = NULL;
13148c2ecf20Sopenharmony_ci	LIST_HEAD(pagepool);
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci	trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	f.readahead = true;
13198c2ecf20Sopenharmony_ci	f.headoffset = readahead_pos(rac);
13208c2ecf20Sopenharmony_ci
13218c2ecf20Sopenharmony_ci	while ((page = readahead_page(rac))) {
13228c2ecf20Sopenharmony_ci		prefetchw(&page->flags);
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci		/*
13258c2ecf20Sopenharmony_ci		 * A pure asynchronous readahead is indicated if
13268c2ecf20Sopenharmony_ci		 * a PG_readahead marked page is hitted at first.
13278c2ecf20Sopenharmony_ci		 * Let's also do asynchronous decompression for this case.
13288c2ecf20Sopenharmony_ci		 */
13298c2ecf20Sopenharmony_ci		sync &= !(PageReadahead(page) && !head);
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ci		set_page_private(page, (unsigned long)head);
13328c2ecf20Sopenharmony_ci		head = page;
13338c2ecf20Sopenharmony_ci	}
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	while (head) {
13368c2ecf20Sopenharmony_ci		struct page *page = head;
13378c2ecf20Sopenharmony_ci		int err;
13388c2ecf20Sopenharmony_ci
13398c2ecf20Sopenharmony_ci		/* traversal in reverse order */
13408c2ecf20Sopenharmony_ci		head = (void *)page_private(page);
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci		err = z_erofs_do_read_page(&f, page);
13438c2ecf20Sopenharmony_ci		if (err)
13448c2ecf20Sopenharmony_ci			erofs_err(inode->i_sb,
13458c2ecf20Sopenharmony_ci				  "readahead error at page %lu @ nid %llu",
13468c2ecf20Sopenharmony_ci				  page->index, EROFS_I(inode)->nid);
13478c2ecf20Sopenharmony_ci		put_page(page);
13488c2ecf20Sopenharmony_ci	}
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	(void)z_erofs_collector_end(&f.clt);
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci	if (f.map.mpage)
13558c2ecf20Sopenharmony_ci		put_page(f.map.mpage);
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	/* clean up the remaining free pages */
13588c2ecf20Sopenharmony_ci	put_pages_list(&pagepool);
13598c2ecf20Sopenharmony_ci}
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ciconst struct address_space_operations z_erofs_aops = {
13628c2ecf20Sopenharmony_ci	.readpage = z_erofs_readpage,
13638c2ecf20Sopenharmony_ci	.readahead = z_erofs_readahead,
13648c2ecf20Sopenharmony_ci};
13658c2ecf20Sopenharmony_ci
1366