162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2018 HUAWEI, Inc. 462306a36Sopenharmony_ci * https://www.huawei.com/ 562306a36Sopenharmony_ci * Copyright (C) 2022 Alibaba Cloud 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include "compress.h" 862306a36Sopenharmony_ci#include <linux/psi.h> 962306a36Sopenharmony_ci#include <linux/cpuhotplug.h> 1062306a36Sopenharmony_ci#include <trace/events/erofs.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) 1362306a36Sopenharmony_ci#define Z_EROFS_INLINE_BVECS 2 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci/* 1662306a36Sopenharmony_ci * let's leave a type here in case of introducing 1762306a36Sopenharmony_ci * another tagged pointer later. 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_citypedef void *z_erofs_next_pcluster_t; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistruct z_erofs_bvec { 2262306a36Sopenharmony_ci struct page *page; 2362306a36Sopenharmony_ci int offset; 2462306a36Sopenharmony_ci unsigned int end; 2562306a36Sopenharmony_ci}; 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define __Z_EROFS_BVSET(name, total) \ 2862306a36Sopenharmony_cistruct name { \ 2962306a36Sopenharmony_ci /* point to the next page which contains the following bvecs */ \ 3062306a36Sopenharmony_ci struct page *nextpage; \ 3162306a36Sopenharmony_ci struct z_erofs_bvec bvec[total]; \ 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci__Z_EROFS_BVSET(z_erofs_bvset,); 3462306a36Sopenharmony_ci__Z_EROFS_BVSET(z_erofs_bvset_inline, Z_EROFS_INLINE_BVECS); 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci/* 3762306a36Sopenharmony_ci * Structure fields follow one of the following exclusion rules. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * I: Modifiable by initialization/destruction paths and read-only 4062306a36Sopenharmony_ci * for everyone else; 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * L: Field should be protected by the pcluster lock; 4362306a36Sopenharmony_ci * 4462306a36Sopenharmony_ci * A: Field should be accessed / updated in atomic for parallelized code. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_cistruct z_erofs_pcluster { 4762306a36Sopenharmony_ci struct erofs_workgroup obj; 4862306a36Sopenharmony_ci struct mutex lock; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci /* A: point to next chained pcluster or TAILs */ 5162306a36Sopenharmony_ci z_erofs_next_pcluster_t next; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci /* L: the maximum decompression size of this round */ 5462306a36Sopenharmony_ci unsigned int length; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci /* L: total number of bvecs */ 5762306a36Sopenharmony_ci unsigned int vcnt; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci /* I: page offset of start position of decompression */ 6062306a36Sopenharmony_ci unsigned short pageofs_out; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci /* I: page offset of inline compressed data */ 6362306a36Sopenharmony_ci unsigned short pageofs_in; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci union { 6662306a36Sopenharmony_ci /* L: inline a certain number of bvec for bootstrap */ 6762306a36Sopenharmony_ci struct z_erofs_bvset_inline bvset; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* I: can be used to free the pcluster by RCU. */ 7062306a36Sopenharmony_ci struct rcu_head rcu; 7162306a36Sopenharmony_ci }; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci union { 7462306a36Sopenharmony_ci /* I: physical cluster size in pages */ 7562306a36Sopenharmony_ci unsigned short pclusterpages; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci /* I: tailpacking inline compressed size */ 7862306a36Sopenharmony_ci unsigned short tailpacking_size; 7962306a36Sopenharmony_ci }; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci /* I: compression algorithm format */ 8262306a36Sopenharmony_ci unsigned char algorithmformat; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci /* L: whether partial decompression or not */ 8562306a36Sopenharmony_ci bool partial; 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci /* L: indicate several pageofs_outs or not */ 8862306a36Sopenharmony_ci bool multibases; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci /* A: compressed bvecs (can be cached or inplaced pages) */ 9162306a36Sopenharmony_ci struct z_erofs_bvec compressed_bvecs[]; 9262306a36Sopenharmony_ci}; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/* the end of a chain of pclusters */ 9562306a36Sopenharmony_ci#define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA) 9662306a36Sopenharmony_ci#define Z_EROFS_PCLUSTER_NIL (NULL) 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistruct z_erofs_decompressqueue { 9962306a36Sopenharmony_ci struct super_block *sb; 10062306a36Sopenharmony_ci atomic_t pending_bios; 10162306a36Sopenharmony_ci z_erofs_next_pcluster_t head; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci union { 10462306a36Sopenharmony_ci struct completion done; 10562306a36Sopenharmony_ci struct work_struct work; 10662306a36Sopenharmony_ci struct kthread_work kthread_work; 10762306a36Sopenharmony_ci } u; 10862306a36Sopenharmony_ci bool eio, sync; 10962306a36Sopenharmony_ci}; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cistatic inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci return !pcl->obj.index; 11462306a36Sopenharmony_ci} 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_cistatic inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(pcl)) 11962306a36Sopenharmony_ci return 1; 12062306a36Sopenharmony_ci return pcl->pclusterpages; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci/* 12462306a36Sopenharmony_ci * bit 30: I/O error occurred on this page 12562306a36Sopenharmony_ci * bit 0 - 29: remaining parts to complete this page 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_ci#define Z_EROFS_PAGE_EIO (1 << 30) 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_cistatic inline void z_erofs_onlinepage_init(struct page *page) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci union { 13262306a36Sopenharmony_ci atomic_t o; 13362306a36Sopenharmony_ci unsigned long v; 13462306a36Sopenharmony_ci } u = { .o = ATOMIC_INIT(1) }; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci set_page_private(page, u.v); 13762306a36Sopenharmony_ci smp_wmb(); 13862306a36Sopenharmony_ci SetPagePrivate(page); 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic inline void z_erofs_onlinepage_split(struct page *page) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci atomic_inc((atomic_t *)&page->private); 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cistatic void z_erofs_onlinepage_endio(struct page *page, int err) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci int orig, v; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci DBG_BUGON(!PagePrivate(page)); 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci do { 15362306a36Sopenharmony_ci orig = atomic_read((atomic_t *)&page->private); 15462306a36Sopenharmony_ci v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); 15562306a36Sopenharmony_ci } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci if (!(v & ~Z_EROFS_PAGE_EIO)) { 15862306a36Sopenharmony_ci set_page_private(page, 0); 15962306a36Sopenharmony_ci ClearPagePrivate(page); 16062306a36Sopenharmony_ci if (!(v & Z_EROFS_PAGE_EIO)) 16162306a36Sopenharmony_ci SetPageUptodate(page); 16262306a36Sopenharmony_ci unlock_page(page); 16362306a36Sopenharmony_ci } 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci#define Z_EROFS_ONSTACK_PAGES 32 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci/* 16962306a36Sopenharmony_ci * since pclustersize is variable for big pcluster feature, introduce slab 17062306a36Sopenharmony_ci * pools implementation for different pcluster sizes. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_cistruct z_erofs_pcluster_slab { 17362306a36Sopenharmony_ci struct kmem_cache *slab; 17462306a36Sopenharmony_ci unsigned int maxpages; 17562306a36Sopenharmony_ci char name[48]; 17662306a36Sopenharmony_ci}; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci#define _PCLP(n) { .maxpages = n } 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { 18162306a36Sopenharmony_ci _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), 18262306a36Sopenharmony_ci _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) 18362306a36Sopenharmony_ci}; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistruct z_erofs_bvec_iter { 18662306a36Sopenharmony_ci struct page *bvpage; 18762306a36Sopenharmony_ci struct z_erofs_bvset *bvset; 18862306a36Sopenharmony_ci unsigned int nr, cur; 18962306a36Sopenharmony_ci}; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic struct page *z_erofs_bvec_iter_end(struct z_erofs_bvec_iter *iter) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci if (iter->bvpage) 19462306a36Sopenharmony_ci kunmap_local(iter->bvset); 19562306a36Sopenharmony_ci return iter->bvpage; 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistatic struct page *z_erofs_bvset_flip(struct z_erofs_bvec_iter *iter) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci unsigned long base = (unsigned long)((struct z_erofs_bvset *)0)->bvec; 20162306a36Sopenharmony_ci /* have to access nextpage in advance, otherwise it will be unmapped */ 20262306a36Sopenharmony_ci struct page *nextpage = iter->bvset->nextpage; 20362306a36Sopenharmony_ci struct page *oldpage; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci DBG_BUGON(!nextpage); 20662306a36Sopenharmony_ci oldpage = z_erofs_bvec_iter_end(iter); 20762306a36Sopenharmony_ci iter->bvpage = nextpage; 20862306a36Sopenharmony_ci iter->bvset = kmap_local_page(nextpage); 20962306a36Sopenharmony_ci iter->nr = (PAGE_SIZE - base) / sizeof(struct z_erofs_bvec); 21062306a36Sopenharmony_ci iter->cur = 0; 21162306a36Sopenharmony_ci return oldpage; 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_cistatic void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter, 21562306a36Sopenharmony_ci struct z_erofs_bvset_inline *bvset, 21662306a36Sopenharmony_ci unsigned int bootstrap_nr, 21762306a36Sopenharmony_ci unsigned int cur) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci *iter = (struct z_erofs_bvec_iter) { 22062306a36Sopenharmony_ci .nr = bootstrap_nr, 22162306a36Sopenharmony_ci .bvset = (struct z_erofs_bvset *)bvset, 22262306a36Sopenharmony_ci }; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci while (cur > iter->nr) { 22562306a36Sopenharmony_ci cur -= iter->nr; 22662306a36Sopenharmony_ci z_erofs_bvset_flip(iter); 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci iter->cur = cur; 22962306a36Sopenharmony_ci} 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, 23262306a36Sopenharmony_ci struct z_erofs_bvec *bvec, 23362306a36Sopenharmony_ci struct page **candidate_bvpage, 23462306a36Sopenharmony_ci struct page **pagepool) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci if (iter->cur >= iter->nr) { 23762306a36Sopenharmony_ci struct page *nextpage = *candidate_bvpage; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (!nextpage) { 24062306a36Sopenharmony_ci nextpage = erofs_allocpage(pagepool, GFP_NOFS); 24162306a36Sopenharmony_ci if (!nextpage) 24262306a36Sopenharmony_ci return -ENOMEM; 24362306a36Sopenharmony_ci set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); 24462306a36Sopenharmony_ci } 24562306a36Sopenharmony_ci DBG_BUGON(iter->bvset->nextpage); 24662306a36Sopenharmony_ci iter->bvset->nextpage = nextpage; 24762306a36Sopenharmony_ci z_erofs_bvset_flip(iter); 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci iter->bvset->nextpage = NULL; 25062306a36Sopenharmony_ci *candidate_bvpage = NULL; 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci iter->bvset->bvec[iter->cur++] = *bvec; 25362306a36Sopenharmony_ci return 0; 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_cistatic void z_erofs_bvec_dequeue(struct z_erofs_bvec_iter *iter, 25762306a36Sopenharmony_ci struct z_erofs_bvec *bvec, 25862306a36Sopenharmony_ci struct page **old_bvpage) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci if (iter->cur == iter->nr) 26162306a36Sopenharmony_ci *old_bvpage = z_erofs_bvset_flip(iter); 26262306a36Sopenharmony_ci else 26362306a36Sopenharmony_ci *old_bvpage = NULL; 26462306a36Sopenharmony_ci *bvec = iter->bvset->bvec[iter->cur++]; 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_cistatic void z_erofs_destroy_pcluster_pool(void) 26862306a36Sopenharmony_ci{ 26962306a36Sopenharmony_ci int i; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { 27262306a36Sopenharmony_ci if (!pcluster_pool[i].slab) 27362306a36Sopenharmony_ci continue; 27462306a36Sopenharmony_ci kmem_cache_destroy(pcluster_pool[i].slab); 27562306a36Sopenharmony_ci pcluster_pool[i].slab = NULL; 27662306a36Sopenharmony_ci } 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cistatic int z_erofs_create_pcluster_pool(void) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci struct z_erofs_pcluster_slab *pcs; 28262306a36Sopenharmony_ci struct z_erofs_pcluster *a; 28362306a36Sopenharmony_ci unsigned int size; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci for (pcs = pcluster_pool; 28662306a36Sopenharmony_ci pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) { 28762306a36Sopenharmony_ci size = struct_size(a, compressed_bvecs, pcs->maxpages); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages); 29062306a36Sopenharmony_ci pcs->slab = kmem_cache_create(pcs->name, size, 0, 29162306a36Sopenharmony_ci SLAB_RECLAIM_ACCOUNT, NULL); 29262306a36Sopenharmony_ci if (pcs->slab) 29362306a36Sopenharmony_ci continue; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci z_erofs_destroy_pcluster_pool(); 29662306a36Sopenharmony_ci return -ENOMEM; 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci return 0; 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_cistatic struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci int i; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { 30662306a36Sopenharmony_ci struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; 30762306a36Sopenharmony_ci struct z_erofs_pcluster *pcl; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci if (nrpages > pcs->maxpages) 31062306a36Sopenharmony_ci continue; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); 31362306a36Sopenharmony_ci if (!pcl) 31462306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 31562306a36Sopenharmony_ci pcl->pclusterpages = nrpages; 31662306a36Sopenharmony_ci return pcl; 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cistatic void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci unsigned int pclusterpages = z_erofs_pclusterpages(pcl); 32462306a36Sopenharmony_ci int i; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { 32762306a36Sopenharmony_ci struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci if (pclusterpages > pcs->maxpages) 33062306a36Sopenharmony_ci continue; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci kmem_cache_free(pcs->slab, pcl); 33362306a36Sopenharmony_ci return; 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci DBG_BUGON(1); 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_cistatic struct workqueue_struct *z_erofs_workqueue __read_mostly; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD 34162306a36Sopenharmony_cistatic struct kthread_worker __rcu **z_erofs_pcpu_workers; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic void erofs_destroy_percpu_workers(void) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct kthread_worker *worker; 34662306a36Sopenharmony_ci unsigned int cpu; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 34962306a36Sopenharmony_ci worker = rcu_dereference_protected( 35062306a36Sopenharmony_ci z_erofs_pcpu_workers[cpu], 1); 35162306a36Sopenharmony_ci rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL); 35262306a36Sopenharmony_ci if (worker) 35362306a36Sopenharmony_ci kthread_destroy_worker(worker); 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci kfree(z_erofs_pcpu_workers); 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_cistatic struct kthread_worker *erofs_init_percpu_worker(int cpu) 35962306a36Sopenharmony_ci{ 36062306a36Sopenharmony_ci struct kthread_worker *worker = 36162306a36Sopenharmony_ci kthread_create_worker_on_cpu(cpu, 0, "erofs_worker/%u", cpu); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci if (IS_ERR(worker)) 36462306a36Sopenharmony_ci return worker; 36562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI)) 36662306a36Sopenharmony_ci sched_set_fifo_low(worker->task); 36762306a36Sopenharmony_ci return worker; 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_cistatic int erofs_init_percpu_workers(void) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci struct kthread_worker *worker; 37362306a36Sopenharmony_ci unsigned int cpu; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci z_erofs_pcpu_workers = kcalloc(num_possible_cpus(), 37662306a36Sopenharmony_ci sizeof(struct kthread_worker *), GFP_ATOMIC); 37762306a36Sopenharmony_ci if (!z_erofs_pcpu_workers) 37862306a36Sopenharmony_ci return -ENOMEM; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci for_each_online_cpu(cpu) { /* could miss cpu{off,on}line? */ 38162306a36Sopenharmony_ci worker = erofs_init_percpu_worker(cpu); 38262306a36Sopenharmony_ci if (!IS_ERR(worker)) 38362306a36Sopenharmony_ci rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker); 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci return 0; 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci#else 38862306a36Sopenharmony_cistatic inline void erofs_destroy_percpu_workers(void) {} 38962306a36Sopenharmony_cistatic inline int erofs_init_percpu_workers(void) { return 0; } 39062306a36Sopenharmony_ci#endif 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_EROFS_FS_PCPU_KTHREAD) 39362306a36Sopenharmony_cistatic DEFINE_SPINLOCK(z_erofs_pcpu_worker_lock); 39462306a36Sopenharmony_cistatic enum cpuhp_state erofs_cpuhp_state; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_cistatic int erofs_cpu_online(unsigned int cpu) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci struct kthread_worker *worker, *old; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci worker = erofs_init_percpu_worker(cpu); 40162306a36Sopenharmony_ci if (IS_ERR(worker)) 40262306a36Sopenharmony_ci return PTR_ERR(worker); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci spin_lock(&z_erofs_pcpu_worker_lock); 40562306a36Sopenharmony_ci old = rcu_dereference_protected(z_erofs_pcpu_workers[cpu], 40662306a36Sopenharmony_ci lockdep_is_held(&z_erofs_pcpu_worker_lock)); 40762306a36Sopenharmony_ci if (!old) 40862306a36Sopenharmony_ci rcu_assign_pointer(z_erofs_pcpu_workers[cpu], worker); 40962306a36Sopenharmony_ci spin_unlock(&z_erofs_pcpu_worker_lock); 41062306a36Sopenharmony_ci if (old) 41162306a36Sopenharmony_ci kthread_destroy_worker(worker); 41262306a36Sopenharmony_ci return 0; 41362306a36Sopenharmony_ci} 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_cistatic int erofs_cpu_offline(unsigned int cpu) 41662306a36Sopenharmony_ci{ 41762306a36Sopenharmony_ci struct kthread_worker *worker; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci spin_lock(&z_erofs_pcpu_worker_lock); 42062306a36Sopenharmony_ci worker = rcu_dereference_protected(z_erofs_pcpu_workers[cpu], 42162306a36Sopenharmony_ci lockdep_is_held(&z_erofs_pcpu_worker_lock)); 42262306a36Sopenharmony_ci rcu_assign_pointer(z_erofs_pcpu_workers[cpu], NULL); 42362306a36Sopenharmony_ci spin_unlock(&z_erofs_pcpu_worker_lock); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci synchronize_rcu(); 42662306a36Sopenharmony_ci if (worker) 42762306a36Sopenharmony_ci kthread_destroy_worker(worker); 42862306a36Sopenharmony_ci return 0; 42962306a36Sopenharmony_ci} 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_cistatic int erofs_cpu_hotplug_init(void) 43262306a36Sopenharmony_ci{ 43362306a36Sopenharmony_ci int state; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, 43662306a36Sopenharmony_ci "fs/erofs:online", erofs_cpu_online, erofs_cpu_offline); 43762306a36Sopenharmony_ci if (state < 0) 43862306a36Sopenharmony_ci return state; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci erofs_cpuhp_state = state; 44162306a36Sopenharmony_ci return 0; 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistatic void erofs_cpu_hotplug_destroy(void) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci if (erofs_cpuhp_state) 44762306a36Sopenharmony_ci cpuhp_remove_state_nocalls(erofs_cpuhp_state); 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci#else /* !CONFIG_HOTPLUG_CPU || !CONFIG_EROFS_FS_PCPU_KTHREAD */ 45062306a36Sopenharmony_cistatic inline int erofs_cpu_hotplug_init(void) { return 0; } 45162306a36Sopenharmony_cistatic inline void erofs_cpu_hotplug_destroy(void) {} 45262306a36Sopenharmony_ci#endif 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_civoid z_erofs_exit_zip_subsystem(void) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci erofs_cpu_hotplug_destroy(); 45762306a36Sopenharmony_ci erofs_destroy_percpu_workers(); 45862306a36Sopenharmony_ci destroy_workqueue(z_erofs_workqueue); 45962306a36Sopenharmony_ci z_erofs_destroy_pcluster_pool(); 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ciint __init z_erofs_init_zip_subsystem(void) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci int err = z_erofs_create_pcluster_pool(); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci if (err) 46762306a36Sopenharmony_ci goto out_error_pcluster_pool; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci z_erofs_workqueue = alloc_workqueue("erofs_worker", 47062306a36Sopenharmony_ci WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus()); 47162306a36Sopenharmony_ci if (!z_erofs_workqueue) { 47262306a36Sopenharmony_ci err = -ENOMEM; 47362306a36Sopenharmony_ci goto out_error_workqueue_init; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci err = erofs_init_percpu_workers(); 47762306a36Sopenharmony_ci if (err) 47862306a36Sopenharmony_ci goto out_error_pcpu_worker; 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci err = erofs_cpu_hotplug_init(); 48162306a36Sopenharmony_ci if (err < 0) 48262306a36Sopenharmony_ci goto out_error_cpuhp_init; 48362306a36Sopenharmony_ci return err; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ciout_error_cpuhp_init: 48662306a36Sopenharmony_ci erofs_destroy_percpu_workers(); 48762306a36Sopenharmony_ciout_error_pcpu_worker: 48862306a36Sopenharmony_ci destroy_workqueue(z_erofs_workqueue); 48962306a36Sopenharmony_ciout_error_workqueue_init: 49062306a36Sopenharmony_ci z_erofs_destroy_pcluster_pool(); 49162306a36Sopenharmony_ciout_error_pcluster_pool: 49262306a36Sopenharmony_ci return err; 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_cienum z_erofs_pclustermode { 49662306a36Sopenharmony_ci Z_EROFS_PCLUSTER_INFLIGHT, 49762306a36Sopenharmony_ci /* 49862306a36Sopenharmony_ci * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it 49962306a36Sopenharmony_ci * could be dispatched into bypass queue later due to uptodated managed 50062306a36Sopenharmony_ci * pages. All related online pages cannot be reused for inplace I/O (or 50162306a36Sopenharmony_ci * bvpage) since it can be directly decoded without I/O submission. 50262306a36Sopenharmony_ci */ 50362306a36Sopenharmony_ci Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE, 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * The pcluster was just linked to a decompression chain by us. It can 50662306a36Sopenharmony_ci * also be linked with the remaining pclusters, which means if the 50762306a36Sopenharmony_ci * processing page is the tail page of a pcluster, this pcluster can 50862306a36Sopenharmony_ci * safely use the whole page (since the previous pcluster is within the 50962306a36Sopenharmony_ci * same chain) for in-place I/O, as illustrated below: 51062306a36Sopenharmony_ci * ___________________________________________________ 51162306a36Sopenharmony_ci * | tail (partial) page | head (partial) page | 51262306a36Sopenharmony_ci * | (of the current pcl) | (of the previous pcl) | 51362306a36Sopenharmony_ci * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____| 51462306a36Sopenharmony_ci * 51562306a36Sopenharmony_ci * [ (*) the page above can be used as inplace I/O. ] 51662306a36Sopenharmony_ci */ 51762306a36Sopenharmony_ci Z_EROFS_PCLUSTER_FOLLOWED, 51862306a36Sopenharmony_ci}; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cistruct z_erofs_decompress_frontend { 52162306a36Sopenharmony_ci struct inode *const inode; 52262306a36Sopenharmony_ci struct erofs_map_blocks map; 52362306a36Sopenharmony_ci struct z_erofs_bvec_iter biter; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci struct page *pagepool; 52662306a36Sopenharmony_ci struct page *candidate_bvpage; 52762306a36Sopenharmony_ci struct z_erofs_pcluster *pcl; 52862306a36Sopenharmony_ci z_erofs_next_pcluster_t owned_head; 52962306a36Sopenharmony_ci enum z_erofs_pclustermode mode; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci erofs_off_t headoffset; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* a pointer used to pick up inplace I/O pages */ 53462306a36Sopenharmony_ci unsigned int icur; 53562306a36Sopenharmony_ci}; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci#define DECOMPRESS_FRONTEND_INIT(__i) { \ 53862306a36Sopenharmony_ci .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ 53962306a36Sopenharmony_ci .mode = Z_EROFS_PCLUSTER_FOLLOWED } 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_cistatic bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) 54662306a36Sopenharmony_ci return false; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED)) 54962306a36Sopenharmony_ci return true; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND && 55262306a36Sopenharmony_ci fe->map.m_la < fe->headoffset) 55362306a36Sopenharmony_ci return true; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci return false; 55662306a36Sopenharmony_ci} 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_cistatic void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); 56162306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = fe->pcl; 56262306a36Sopenharmony_ci bool shouldalloc = z_erofs_should_alloc_cache(fe); 56362306a36Sopenharmony_ci bool standalone = true; 56462306a36Sopenharmony_ci /* 56562306a36Sopenharmony_ci * optimistic allocation without direct reclaim since inplace I/O 56662306a36Sopenharmony_ci * can be used if low memory otherwise. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ci gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | 56962306a36Sopenharmony_ci __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 57062306a36Sopenharmony_ci unsigned int i; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) 57362306a36Sopenharmony_ci return; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci for (i = 0; i < pcl->pclusterpages; ++i) { 57662306a36Sopenharmony_ci struct page *page; 57762306a36Sopenharmony_ci void *t; /* mark pages just found for debugging */ 57862306a36Sopenharmony_ci struct page *newpage = NULL; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci /* the compressed page was loaded before */ 58162306a36Sopenharmony_ci if (READ_ONCE(pcl->compressed_bvecs[i].page)) 58262306a36Sopenharmony_ci continue; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci page = find_get_page(mc, pcl->obj.index + i); 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (page) { 58762306a36Sopenharmony_ci t = (void *)((unsigned long)page | 1); 58862306a36Sopenharmony_ci } else { 58962306a36Sopenharmony_ci /* I/O is needed, no possible to decompress directly */ 59062306a36Sopenharmony_ci standalone = false; 59162306a36Sopenharmony_ci if (!shouldalloc) 59262306a36Sopenharmony_ci continue; 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci /* 59562306a36Sopenharmony_ci * try to use cached I/O if page allocation 59662306a36Sopenharmony_ci * succeeds or fallback to in-place I/O instead 59762306a36Sopenharmony_ci * to avoid any direct reclaim. 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_ci newpage = erofs_allocpage(&fe->pagepool, gfp); 60062306a36Sopenharmony_ci if (!newpage) 60162306a36Sopenharmony_ci continue; 60262306a36Sopenharmony_ci set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); 60362306a36Sopenharmony_ci t = (void *)((unsigned long)newpage | 1); 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t)) 60762306a36Sopenharmony_ci continue; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (page) 61062306a36Sopenharmony_ci put_page(page); 61162306a36Sopenharmony_ci else if (newpage) 61262306a36Sopenharmony_ci erofs_pagepool_add(&fe->pagepool, newpage); 61362306a36Sopenharmony_ci } 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci /* 61662306a36Sopenharmony_ci * don't do inplace I/O if all compressed pages are available in 61762306a36Sopenharmony_ci * managed cache since it can be moved to the bypass queue instead. 61862306a36Sopenharmony_ci */ 61962306a36Sopenharmony_ci if (standalone) 62062306a36Sopenharmony_ci fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; 62162306a36Sopenharmony_ci} 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci/* called by erofs_shrinker to get rid of all compressed_pages */ 62462306a36Sopenharmony_ciint erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, 62562306a36Sopenharmony_ci struct erofs_workgroup *grp) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci struct z_erofs_pcluster *const pcl = 62862306a36Sopenharmony_ci container_of(grp, struct z_erofs_pcluster, obj); 62962306a36Sopenharmony_ci int i; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); 63262306a36Sopenharmony_ci /* 63362306a36Sopenharmony_ci * refcount of workgroup is now freezed as 0, 63462306a36Sopenharmony_ci * therefore no need to worry about available decompression users. 63562306a36Sopenharmony_ci */ 63662306a36Sopenharmony_ci for (i = 0; i < pcl->pclusterpages; ++i) { 63762306a36Sopenharmony_ci struct page *page = pcl->compressed_bvecs[i].page; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci if (!page) 64062306a36Sopenharmony_ci continue; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci /* block other users from reclaiming or migrating the page */ 64362306a36Sopenharmony_ci if (!trylock_page(page)) 64462306a36Sopenharmony_ci return -EBUSY; 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci if (!erofs_page_is_managed(sbi, page)) 64762306a36Sopenharmony_ci continue; 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci /* barrier is implied in the following 'unlock_page' */ 65062306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); 65162306a36Sopenharmony_ci detach_page_private(page); 65262306a36Sopenharmony_ci unlock_page(page); 65362306a36Sopenharmony_ci } 65462306a36Sopenharmony_ci return 0; 65562306a36Sopenharmony_ci} 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_cistatic bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) 65862306a36Sopenharmony_ci{ 65962306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = folio_get_private(folio); 66062306a36Sopenharmony_ci bool ret; 66162306a36Sopenharmony_ci int i; 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci if (!folio_test_private(folio)) 66462306a36Sopenharmony_ci return true; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci ret = false; 66762306a36Sopenharmony_ci spin_lock(&pcl->obj.lockref.lock); 66862306a36Sopenharmony_ci if (pcl->obj.lockref.count > 0) 66962306a36Sopenharmony_ci goto out; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); 67262306a36Sopenharmony_ci for (i = 0; i < pcl->pclusterpages; ++i) { 67362306a36Sopenharmony_ci if (pcl->compressed_bvecs[i].page == &folio->page) { 67462306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); 67562306a36Sopenharmony_ci ret = true; 67662306a36Sopenharmony_ci break; 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci } 67962306a36Sopenharmony_ci if (ret) 68062306a36Sopenharmony_ci folio_detach_private(folio); 68162306a36Sopenharmony_ciout: 68262306a36Sopenharmony_ci spin_unlock(&pcl->obj.lockref.lock); 68362306a36Sopenharmony_ci return ret; 68462306a36Sopenharmony_ci} 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci/* 68762306a36Sopenharmony_ci * It will be called only on inode eviction. In case that there are still some 68862306a36Sopenharmony_ci * decompression requests in progress, wait with rescheduling for a bit here. 68962306a36Sopenharmony_ci * An extra lock could be introduced instead but it seems unnecessary. 69062306a36Sopenharmony_ci */ 69162306a36Sopenharmony_cistatic void z_erofs_cache_invalidate_folio(struct folio *folio, 69262306a36Sopenharmony_ci size_t offset, size_t length) 69362306a36Sopenharmony_ci{ 69462306a36Sopenharmony_ci const size_t stop = length + offset; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci /* Check for potential overflow in debug mode */ 69762306a36Sopenharmony_ci DBG_BUGON(stop > folio_size(folio) || stop < length); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci if (offset == 0 && stop == folio_size(folio)) 70062306a36Sopenharmony_ci while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) 70162306a36Sopenharmony_ci cond_resched(); 70262306a36Sopenharmony_ci} 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_cistatic const struct address_space_operations z_erofs_cache_aops = { 70562306a36Sopenharmony_ci .release_folio = z_erofs_cache_release_folio, 70662306a36Sopenharmony_ci .invalidate_folio = z_erofs_cache_invalidate_folio, 70762306a36Sopenharmony_ci}; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ciint erofs_init_managed_cache(struct super_block *sb) 71062306a36Sopenharmony_ci{ 71162306a36Sopenharmony_ci struct inode *const inode = new_inode(sb); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci if (!inode) 71462306a36Sopenharmony_ci return -ENOMEM; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci set_nlink(inode, 1); 71762306a36Sopenharmony_ci inode->i_size = OFFSET_MAX; 71862306a36Sopenharmony_ci inode->i_mapping->a_ops = &z_erofs_cache_aops; 71962306a36Sopenharmony_ci mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 72062306a36Sopenharmony_ci EROFS_SB(sb)->managed_cache = inode; 72162306a36Sopenharmony_ci return 0; 72262306a36Sopenharmony_ci} 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_cistatic bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, 72562306a36Sopenharmony_ci struct z_erofs_bvec *bvec) 72662306a36Sopenharmony_ci{ 72762306a36Sopenharmony_ci struct z_erofs_pcluster *const pcl = fe->pcl; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci while (fe->icur > 0) { 73062306a36Sopenharmony_ci if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page, 73162306a36Sopenharmony_ci NULL, bvec->page)) { 73262306a36Sopenharmony_ci pcl->compressed_bvecs[fe->icur] = *bvec; 73362306a36Sopenharmony_ci return true; 73462306a36Sopenharmony_ci } 73562306a36Sopenharmony_ci } 73662306a36Sopenharmony_ci return false; 73762306a36Sopenharmony_ci} 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci/* callers must be with pcluster lock held */ 74062306a36Sopenharmony_cistatic int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, 74162306a36Sopenharmony_ci struct z_erofs_bvec *bvec, bool exclusive) 74262306a36Sopenharmony_ci{ 74362306a36Sopenharmony_ci int ret; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci if (exclusive) { 74662306a36Sopenharmony_ci /* give priority for inplaceio to use file pages first */ 74762306a36Sopenharmony_ci if (z_erofs_try_inplace_io(fe, bvec)) 74862306a36Sopenharmony_ci return 0; 74962306a36Sopenharmony_ci /* otherwise, check if it can be used as a bvpage */ 75062306a36Sopenharmony_ci if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && 75162306a36Sopenharmony_ci !fe->candidate_bvpage) 75262306a36Sopenharmony_ci fe->candidate_bvpage = bvec->page; 75362306a36Sopenharmony_ci } 75462306a36Sopenharmony_ci ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage, 75562306a36Sopenharmony_ci &fe->pagepool); 75662306a36Sopenharmony_ci fe->pcl->vcnt += (ret >= 0); 75762306a36Sopenharmony_ci return ret; 75862306a36Sopenharmony_ci} 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_cistatic void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) 76162306a36Sopenharmony_ci{ 76262306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = f->pcl; 76362306a36Sopenharmony_ci z_erofs_next_pcluster_t *owned_head = &f->owned_head; 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ 76662306a36Sopenharmony_ci if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, 76762306a36Sopenharmony_ci *owned_head) == Z_EROFS_PCLUSTER_NIL) { 76862306a36Sopenharmony_ci *owned_head = &pcl->next; 76962306a36Sopenharmony_ci /* so we can attach this pcluster to our submission chain. */ 77062306a36Sopenharmony_ci f->mode = Z_EROFS_PCLUSTER_FOLLOWED; 77162306a36Sopenharmony_ci return; 77262306a36Sopenharmony_ci } 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci /* type 2, it belongs to an ongoing chain */ 77562306a36Sopenharmony_ci f->mode = Z_EROFS_PCLUSTER_INFLIGHT; 77662306a36Sopenharmony_ci} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_cistatic int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) 77962306a36Sopenharmony_ci{ 78062306a36Sopenharmony_ci struct erofs_map_blocks *map = &fe->map; 78162306a36Sopenharmony_ci bool ztailpacking = map->m_flags & EROFS_MAP_META; 78262306a36Sopenharmony_ci struct z_erofs_pcluster *pcl; 78362306a36Sopenharmony_ci struct erofs_workgroup *grp; 78462306a36Sopenharmony_ci int err; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci if (!(map->m_flags & EROFS_MAP_ENCODED) || 78762306a36Sopenharmony_ci (!ztailpacking && !(map->m_pa >> PAGE_SHIFT))) { 78862306a36Sopenharmony_ci DBG_BUGON(1); 78962306a36Sopenharmony_ci return -EFSCORRUPTED; 79062306a36Sopenharmony_ci } 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci /* no available pcluster, let's allocate one */ 79362306a36Sopenharmony_ci pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 : 79462306a36Sopenharmony_ci map->m_plen >> PAGE_SHIFT); 79562306a36Sopenharmony_ci if (IS_ERR(pcl)) 79662306a36Sopenharmony_ci return PTR_ERR(pcl); 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci spin_lock_init(&pcl->obj.lockref.lock); 79962306a36Sopenharmony_ci pcl->obj.lockref.count = 1; /* one ref for this request */ 80062306a36Sopenharmony_ci pcl->algorithmformat = map->m_algorithmformat; 80162306a36Sopenharmony_ci pcl->length = 0; 80262306a36Sopenharmony_ci pcl->partial = true; 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci /* new pclusters should be claimed as type 1, primary and followed */ 80562306a36Sopenharmony_ci pcl->next = fe->owned_head; 80662306a36Sopenharmony_ci pcl->pageofs_out = map->m_la & ~PAGE_MASK; 80762306a36Sopenharmony_ci fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci /* 81062306a36Sopenharmony_ci * lock all primary followed works before visible to others 81162306a36Sopenharmony_ci * and mutex_trylock *never* fails for a new pcluster. 81262306a36Sopenharmony_ci */ 81362306a36Sopenharmony_ci mutex_init(&pcl->lock); 81462306a36Sopenharmony_ci DBG_BUGON(!mutex_trylock(&pcl->lock)); 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci if (ztailpacking) { 81762306a36Sopenharmony_ci pcl->obj.index = 0; /* which indicates ztailpacking */ 81862306a36Sopenharmony_ci pcl->tailpacking_size = map->m_plen; 81962306a36Sopenharmony_ci } else { 82062306a36Sopenharmony_ci pcl->obj.index = map->m_pa >> PAGE_SHIFT; 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj); 82362306a36Sopenharmony_ci if (IS_ERR(grp)) { 82462306a36Sopenharmony_ci err = PTR_ERR(grp); 82562306a36Sopenharmony_ci goto err_out; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci if (grp != &pcl->obj) { 82962306a36Sopenharmony_ci fe->pcl = container_of(grp, 83062306a36Sopenharmony_ci struct z_erofs_pcluster, obj); 83162306a36Sopenharmony_ci err = -EEXIST; 83262306a36Sopenharmony_ci goto err_out; 83362306a36Sopenharmony_ci } 83462306a36Sopenharmony_ci } 83562306a36Sopenharmony_ci fe->owned_head = &pcl->next; 83662306a36Sopenharmony_ci fe->pcl = pcl; 83762306a36Sopenharmony_ci return 0; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_cierr_out: 84062306a36Sopenharmony_ci mutex_unlock(&pcl->lock); 84162306a36Sopenharmony_ci z_erofs_free_pcluster(pcl); 84262306a36Sopenharmony_ci return err; 84362306a36Sopenharmony_ci} 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_cistatic int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci struct erofs_map_blocks *map = &fe->map; 84862306a36Sopenharmony_ci struct super_block *sb = fe->inode->i_sb; 84962306a36Sopenharmony_ci erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); 85062306a36Sopenharmony_ci struct erofs_workgroup *grp = NULL; 85162306a36Sopenharmony_ci int ret; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci DBG_BUGON(fe->pcl); 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */ 85662306a36Sopenharmony_ci DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci if (!(map->m_flags & EROFS_MAP_META)) { 85962306a36Sopenharmony_ci grp = erofs_find_workgroup(sb, blknr); 86062306a36Sopenharmony_ci } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { 86162306a36Sopenharmony_ci DBG_BUGON(1); 86262306a36Sopenharmony_ci return -EFSCORRUPTED; 86362306a36Sopenharmony_ci } 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci if (grp) { 86662306a36Sopenharmony_ci fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); 86762306a36Sopenharmony_ci ret = -EEXIST; 86862306a36Sopenharmony_ci } else { 86962306a36Sopenharmony_ci ret = z_erofs_register_pcluster(fe); 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (ret == -EEXIST) { 87362306a36Sopenharmony_ci mutex_lock(&fe->pcl->lock); 87462306a36Sopenharmony_ci z_erofs_try_to_claim_pcluster(fe); 87562306a36Sopenharmony_ci } else if (ret) { 87662306a36Sopenharmony_ci return ret; 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset, 88062306a36Sopenharmony_ci Z_EROFS_INLINE_BVECS, fe->pcl->vcnt); 88162306a36Sopenharmony_ci if (!z_erofs_is_inline_pcluster(fe->pcl)) { 88262306a36Sopenharmony_ci /* bind cache first when cached decompression is preferred */ 88362306a36Sopenharmony_ci z_erofs_bind_cache(fe); 88462306a36Sopenharmony_ci } else { 88562306a36Sopenharmony_ci void *mptr; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP); 88862306a36Sopenharmony_ci if (IS_ERR(mptr)) { 88962306a36Sopenharmony_ci ret = PTR_ERR(mptr); 89062306a36Sopenharmony_ci erofs_err(sb, "failed to get inline data %d", ret); 89162306a36Sopenharmony_ci return ret; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci get_page(map->buf.page); 89462306a36Sopenharmony_ci WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); 89562306a36Sopenharmony_ci fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; 89662306a36Sopenharmony_ci fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; 89762306a36Sopenharmony_ci } 89862306a36Sopenharmony_ci /* file-backed inplace I/O pages are traversed in reverse order */ 89962306a36Sopenharmony_ci fe->icur = z_erofs_pclusterpages(fe->pcl); 90062306a36Sopenharmony_ci return 0; 90162306a36Sopenharmony_ci} 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci/* 90462306a36Sopenharmony_ci * keep in mind that no referenced pclusters will be freed 90562306a36Sopenharmony_ci * only after a RCU grace period. 90662306a36Sopenharmony_ci */ 90762306a36Sopenharmony_cistatic void z_erofs_rcu_callback(struct rcu_head *head) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci z_erofs_free_pcluster(container_of(head, 91062306a36Sopenharmony_ci struct z_erofs_pcluster, rcu)); 91162306a36Sopenharmony_ci} 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_civoid erofs_workgroup_free_rcu(struct erofs_workgroup *grp) 91462306a36Sopenharmony_ci{ 91562306a36Sopenharmony_ci struct z_erofs_pcluster *const pcl = 91662306a36Sopenharmony_ci container_of(grp, struct z_erofs_pcluster, obj); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci call_rcu(&pcl->rcu, z_erofs_rcu_callback); 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_cistatic void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) 92262306a36Sopenharmony_ci{ 92362306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = fe->pcl; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci if (!pcl) 92662306a36Sopenharmony_ci return; 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci z_erofs_bvec_iter_end(&fe->biter); 92962306a36Sopenharmony_ci mutex_unlock(&pcl->lock); 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci if (fe->candidate_bvpage) 93262306a36Sopenharmony_ci fe->candidate_bvpage = NULL; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci /* 93562306a36Sopenharmony_ci * if all pending pages are added, don't hold its reference 93662306a36Sopenharmony_ci * any longer if the pcluster isn't hosted by ourselves. 93762306a36Sopenharmony_ci */ 93862306a36Sopenharmony_ci if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE) 93962306a36Sopenharmony_ci erofs_workgroup_put(&pcl->obj); 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci fe->pcl = NULL; 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_cistatic int z_erofs_read_fragment(struct super_block *sb, struct page *page, 94562306a36Sopenharmony_ci unsigned int cur, unsigned int end, erofs_off_t pos) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci struct inode *packed_inode = EROFS_SB(sb)->packed_inode; 94862306a36Sopenharmony_ci struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 94962306a36Sopenharmony_ci unsigned int cnt; 95062306a36Sopenharmony_ci u8 *src; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci if (!packed_inode) 95362306a36Sopenharmony_ci return -EFSCORRUPTED; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci buf.inode = packed_inode; 95662306a36Sopenharmony_ci for (; cur < end; cur += cnt, pos += cnt) { 95762306a36Sopenharmony_ci cnt = min_t(unsigned int, end - cur, 95862306a36Sopenharmony_ci sb->s_blocksize - erofs_blkoff(sb, pos)); 95962306a36Sopenharmony_ci src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP); 96062306a36Sopenharmony_ci if (IS_ERR(src)) { 96162306a36Sopenharmony_ci erofs_put_metabuf(&buf); 96262306a36Sopenharmony_ci return PTR_ERR(src); 96362306a36Sopenharmony_ci } 96462306a36Sopenharmony_ci memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt); 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci erofs_put_metabuf(&buf); 96762306a36Sopenharmony_ci return 0; 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_cistatic int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, 97162306a36Sopenharmony_ci struct page *page) 97262306a36Sopenharmony_ci{ 97362306a36Sopenharmony_ci struct inode *const inode = fe->inode; 97462306a36Sopenharmony_ci struct erofs_map_blocks *const map = &fe->map; 97562306a36Sopenharmony_ci const loff_t offset = page_offset(page); 97662306a36Sopenharmony_ci bool tight = true, exclusive; 97762306a36Sopenharmony_ci unsigned int cur, end, len, split; 97862306a36Sopenharmony_ci int err = 0; 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci z_erofs_onlinepage_init(page); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci split = 0; 98362306a36Sopenharmony_ci end = PAGE_SIZE; 98462306a36Sopenharmony_cirepeat: 98562306a36Sopenharmony_ci if (offset + end - 1 < map->m_la || 98662306a36Sopenharmony_ci offset + end - 1 >= map->m_la + map->m_llen) { 98762306a36Sopenharmony_ci z_erofs_pcluster_end(fe); 98862306a36Sopenharmony_ci map->m_la = offset + end - 1; 98962306a36Sopenharmony_ci map->m_llen = 0; 99062306a36Sopenharmony_ci err = z_erofs_map_blocks_iter(inode, map, 0); 99162306a36Sopenharmony_ci if (err) 99262306a36Sopenharmony_ci goto out; 99362306a36Sopenharmony_ci } 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci cur = offset > map->m_la ? 0 : map->m_la - offset; 99662306a36Sopenharmony_ci /* bump split parts first to avoid several separate cases */ 99762306a36Sopenharmony_ci ++split; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (!(map->m_flags & EROFS_MAP_MAPPED)) { 100062306a36Sopenharmony_ci zero_user_segment(page, cur, end); 100162306a36Sopenharmony_ci tight = false; 100262306a36Sopenharmony_ci goto next_part; 100362306a36Sopenharmony_ci } 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci if (map->m_flags & EROFS_MAP_FRAGMENT) { 100662306a36Sopenharmony_ci erofs_off_t fpos = offset + cur - map->m_la; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci len = min_t(unsigned int, map->m_llen - fpos, end - cur); 100962306a36Sopenharmony_ci err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, 101062306a36Sopenharmony_ci EROFS_I(inode)->z_fragmentoff + fpos); 101162306a36Sopenharmony_ci if (err) 101262306a36Sopenharmony_ci goto out; 101362306a36Sopenharmony_ci tight = false; 101462306a36Sopenharmony_ci goto next_part; 101562306a36Sopenharmony_ci } 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci if (!fe->pcl) { 101862306a36Sopenharmony_ci err = z_erofs_pcluster_begin(fe); 101962306a36Sopenharmony_ci if (err) 102062306a36Sopenharmony_ci goto out; 102162306a36Sopenharmony_ci } 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci /* 102462306a36Sopenharmony_ci * Ensure the current partial page belongs to this submit chain rather 102562306a36Sopenharmony_ci * than other concurrent submit chains or the noio(bypass) chain since 102662306a36Sopenharmony_ci * those chains are handled asynchronously thus the page cannot be used 102762306a36Sopenharmony_ci * for inplace I/O or bvpage (should be processed in a strict order.) 102862306a36Sopenharmony_ci */ 102962306a36Sopenharmony_ci tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); 103062306a36Sopenharmony_ci exclusive = (!cur && ((split <= 1) || tight)); 103162306a36Sopenharmony_ci if (cur) 103262306a36Sopenharmony_ci tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { 103562306a36Sopenharmony_ci .page = page, 103662306a36Sopenharmony_ci .offset = offset - map->m_la, 103762306a36Sopenharmony_ci .end = end, 103862306a36Sopenharmony_ci }), exclusive); 103962306a36Sopenharmony_ci if (err) 104062306a36Sopenharmony_ci goto out; 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci z_erofs_onlinepage_split(page); 104362306a36Sopenharmony_ci if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) 104462306a36Sopenharmony_ci fe->pcl->multibases = true; 104562306a36Sopenharmony_ci if (fe->pcl->length < offset + end - map->m_la) { 104662306a36Sopenharmony_ci fe->pcl->length = offset + end - map->m_la; 104762306a36Sopenharmony_ci fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && 105062306a36Sopenharmony_ci !(map->m_flags & EROFS_MAP_PARTIAL_REF) && 105162306a36Sopenharmony_ci fe->pcl->length == map->m_llen) 105262306a36Sopenharmony_ci fe->pcl->partial = false; 105362306a36Sopenharmony_cinext_part: 105462306a36Sopenharmony_ci /* shorten the remaining extent to update progress */ 105562306a36Sopenharmony_ci map->m_llen = offset + cur - map->m_la; 105662306a36Sopenharmony_ci map->m_flags &= ~EROFS_MAP_FULL_MAPPED; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci end = cur; 105962306a36Sopenharmony_ci if (end > 0) 106062306a36Sopenharmony_ci goto repeat; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ciout: 106362306a36Sopenharmony_ci z_erofs_onlinepage_endio(page, err); 106462306a36Sopenharmony_ci return err; 106562306a36Sopenharmony_ci} 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_cistatic bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi, 106862306a36Sopenharmony_ci unsigned int readahead_pages) 106962306a36Sopenharmony_ci{ 107062306a36Sopenharmony_ci /* auto: enable for read_folio, disable for readahead */ 107162306a36Sopenharmony_ci if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) && 107262306a36Sopenharmony_ci !readahead_pages) 107362306a36Sopenharmony_ci return true; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci if ((sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_FORCE_ON) && 107662306a36Sopenharmony_ci (readahead_pages <= sbi->opt.max_sync_decompress_pages)) 107762306a36Sopenharmony_ci return true; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci return false; 108062306a36Sopenharmony_ci} 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_cistatic bool z_erofs_page_is_invalidated(struct page *page) 108362306a36Sopenharmony_ci{ 108462306a36Sopenharmony_ci return !page->mapping && !z_erofs_is_shortlived_page(page); 108562306a36Sopenharmony_ci} 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_cistruct z_erofs_decompress_backend { 108862306a36Sopenharmony_ci struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; 108962306a36Sopenharmony_ci struct super_block *sb; 109062306a36Sopenharmony_ci struct z_erofs_pcluster *pcl; 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci /* pages with the longest decompressed length for deduplication */ 109362306a36Sopenharmony_ci struct page **decompressed_pages; 109462306a36Sopenharmony_ci /* pages to keep the compressed data */ 109562306a36Sopenharmony_ci struct page **compressed_pages; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci struct list_head decompressed_secondary_bvecs; 109862306a36Sopenharmony_ci struct page **pagepool; 109962306a36Sopenharmony_ci unsigned int onstack_used, nr_pages; 110062306a36Sopenharmony_ci}; 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_cistruct z_erofs_bvec_item { 110362306a36Sopenharmony_ci struct z_erofs_bvec bvec; 110462306a36Sopenharmony_ci struct list_head list; 110562306a36Sopenharmony_ci}; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_cistatic void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, 110862306a36Sopenharmony_ci struct z_erofs_bvec *bvec) 110962306a36Sopenharmony_ci{ 111062306a36Sopenharmony_ci struct z_erofs_bvec_item *item; 111162306a36Sopenharmony_ci unsigned int pgnr; 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && 111462306a36Sopenharmony_ci (bvec->end == PAGE_SIZE || 111562306a36Sopenharmony_ci bvec->offset + bvec->end == be->pcl->length)) { 111662306a36Sopenharmony_ci pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; 111762306a36Sopenharmony_ci DBG_BUGON(pgnr >= be->nr_pages); 111862306a36Sopenharmony_ci if (!be->decompressed_pages[pgnr]) { 111962306a36Sopenharmony_ci be->decompressed_pages[pgnr] = bvec->page; 112062306a36Sopenharmony_ci return; 112162306a36Sopenharmony_ci } 112262306a36Sopenharmony_ci } 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci /* (cold path) one pcluster is requested multiple times */ 112562306a36Sopenharmony_ci item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_NOFAIL); 112662306a36Sopenharmony_ci item->bvec = *bvec; 112762306a36Sopenharmony_ci list_add(&item->list, &be->decompressed_secondary_bvecs); 112862306a36Sopenharmony_ci} 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_cistatic void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, 113162306a36Sopenharmony_ci int err) 113262306a36Sopenharmony_ci{ 113362306a36Sopenharmony_ci unsigned int off0 = be->pcl->pageofs_out; 113462306a36Sopenharmony_ci struct list_head *p, *n; 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci list_for_each_safe(p, n, &be->decompressed_secondary_bvecs) { 113762306a36Sopenharmony_ci struct z_erofs_bvec_item *bvi; 113862306a36Sopenharmony_ci unsigned int end, cur; 113962306a36Sopenharmony_ci void *dst, *src; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci bvi = container_of(p, struct z_erofs_bvec_item, list); 114262306a36Sopenharmony_ci cur = bvi->bvec.offset < 0 ? -bvi->bvec.offset : 0; 114362306a36Sopenharmony_ci end = min_t(unsigned int, be->pcl->length - bvi->bvec.offset, 114462306a36Sopenharmony_ci bvi->bvec.end); 114562306a36Sopenharmony_ci dst = kmap_local_page(bvi->bvec.page); 114662306a36Sopenharmony_ci while (cur < end) { 114762306a36Sopenharmony_ci unsigned int pgnr, scur, len; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci pgnr = (bvi->bvec.offset + cur + off0) >> PAGE_SHIFT; 115062306a36Sopenharmony_ci DBG_BUGON(pgnr >= be->nr_pages); 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci scur = bvi->bvec.offset + cur - 115362306a36Sopenharmony_ci ((pgnr << PAGE_SHIFT) - off0); 115462306a36Sopenharmony_ci len = min_t(unsigned int, end - cur, PAGE_SIZE - scur); 115562306a36Sopenharmony_ci if (!be->decompressed_pages[pgnr]) { 115662306a36Sopenharmony_ci err = -EFSCORRUPTED; 115762306a36Sopenharmony_ci cur += len; 115862306a36Sopenharmony_ci continue; 115962306a36Sopenharmony_ci } 116062306a36Sopenharmony_ci src = kmap_local_page(be->decompressed_pages[pgnr]); 116162306a36Sopenharmony_ci memcpy(dst + cur, src + scur, len); 116262306a36Sopenharmony_ci kunmap_local(src); 116362306a36Sopenharmony_ci cur += len; 116462306a36Sopenharmony_ci } 116562306a36Sopenharmony_ci kunmap_local(dst); 116662306a36Sopenharmony_ci z_erofs_onlinepage_endio(bvi->bvec.page, err); 116762306a36Sopenharmony_ci list_del(p); 116862306a36Sopenharmony_ci kfree(bvi); 116962306a36Sopenharmony_ci } 117062306a36Sopenharmony_ci} 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_cistatic void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be) 117362306a36Sopenharmony_ci{ 117462306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = be->pcl; 117562306a36Sopenharmony_ci struct z_erofs_bvec_iter biter; 117662306a36Sopenharmony_ci struct page *old_bvpage; 117762306a36Sopenharmony_ci int i; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci z_erofs_bvec_iter_begin(&biter, &pcl->bvset, Z_EROFS_INLINE_BVECS, 0); 118062306a36Sopenharmony_ci for (i = 0; i < pcl->vcnt; ++i) { 118162306a36Sopenharmony_ci struct z_erofs_bvec bvec; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci z_erofs_bvec_dequeue(&biter, &bvec, &old_bvpage); 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci if (old_bvpage) 118662306a36Sopenharmony_ci z_erofs_put_shortlivedpage(be->pagepool, old_bvpage); 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci DBG_BUGON(z_erofs_page_is_invalidated(bvec.page)); 118962306a36Sopenharmony_ci z_erofs_do_decompressed_bvec(be, &bvec); 119062306a36Sopenharmony_ci } 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci old_bvpage = z_erofs_bvec_iter_end(&biter); 119362306a36Sopenharmony_ci if (old_bvpage) 119462306a36Sopenharmony_ci z_erofs_put_shortlivedpage(be->pagepool, old_bvpage); 119562306a36Sopenharmony_ci} 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_cistatic int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, 119862306a36Sopenharmony_ci bool *overlapped) 119962306a36Sopenharmony_ci{ 120062306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = be->pcl; 120162306a36Sopenharmony_ci unsigned int pclusterpages = z_erofs_pclusterpages(pcl); 120262306a36Sopenharmony_ci int i, err = 0; 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci *overlapped = false; 120562306a36Sopenharmony_ci for (i = 0; i < pclusterpages; ++i) { 120662306a36Sopenharmony_ci struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i]; 120762306a36Sopenharmony_ci struct page *page = bvec->page; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci /* compressed pages ought to be present before decompressing */ 121062306a36Sopenharmony_ci if (!page) { 121162306a36Sopenharmony_ci DBG_BUGON(1); 121262306a36Sopenharmony_ci continue; 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci be->compressed_pages[i] = page; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(pcl)) { 121762306a36Sopenharmony_ci if (!PageUptodate(page)) 121862306a36Sopenharmony_ci err = -EIO; 121962306a36Sopenharmony_ci continue; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci DBG_BUGON(z_erofs_page_is_invalidated(page)); 122362306a36Sopenharmony_ci if (!z_erofs_is_shortlived_page(page)) { 122462306a36Sopenharmony_ci if (erofs_page_is_managed(EROFS_SB(be->sb), page)) { 122562306a36Sopenharmony_ci if (!PageUptodate(page)) 122662306a36Sopenharmony_ci err = -EIO; 122762306a36Sopenharmony_ci continue; 122862306a36Sopenharmony_ci } 122962306a36Sopenharmony_ci z_erofs_do_decompressed_bvec(be, bvec); 123062306a36Sopenharmony_ci *overlapped = true; 123162306a36Sopenharmony_ci } 123262306a36Sopenharmony_ci } 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci if (err) 123562306a36Sopenharmony_ci return err; 123662306a36Sopenharmony_ci return 0; 123762306a36Sopenharmony_ci} 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_cistatic int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, 124062306a36Sopenharmony_ci int err) 124162306a36Sopenharmony_ci{ 124262306a36Sopenharmony_ci struct erofs_sb_info *const sbi = EROFS_SB(be->sb); 124362306a36Sopenharmony_ci struct z_erofs_pcluster *pcl = be->pcl; 124462306a36Sopenharmony_ci unsigned int pclusterpages = z_erofs_pclusterpages(pcl); 124562306a36Sopenharmony_ci const struct z_erofs_decompressor *decompressor = 124662306a36Sopenharmony_ci &erofs_decompressors[pcl->algorithmformat]; 124762306a36Sopenharmony_ci unsigned int i, inputsize; 124862306a36Sopenharmony_ci int err2; 124962306a36Sopenharmony_ci struct page *page; 125062306a36Sopenharmony_ci bool overlapped; 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci mutex_lock(&pcl->lock); 125362306a36Sopenharmony_ci be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT; 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci /* allocate (de)compressed page arrays if cannot be kept on stack */ 125662306a36Sopenharmony_ci be->decompressed_pages = NULL; 125762306a36Sopenharmony_ci be->compressed_pages = NULL; 125862306a36Sopenharmony_ci be->onstack_used = 0; 125962306a36Sopenharmony_ci if (be->nr_pages <= Z_EROFS_ONSTACK_PAGES) { 126062306a36Sopenharmony_ci be->decompressed_pages = be->onstack_pages; 126162306a36Sopenharmony_ci be->onstack_used = be->nr_pages; 126262306a36Sopenharmony_ci memset(be->decompressed_pages, 0, 126362306a36Sopenharmony_ci sizeof(struct page *) * be->nr_pages); 126462306a36Sopenharmony_ci } 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci if (pclusterpages + be->onstack_used <= Z_EROFS_ONSTACK_PAGES) 126762306a36Sopenharmony_ci be->compressed_pages = be->onstack_pages + be->onstack_used; 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci if (!be->decompressed_pages) 127062306a36Sopenharmony_ci be->decompressed_pages = 127162306a36Sopenharmony_ci kvcalloc(be->nr_pages, sizeof(struct page *), 127262306a36Sopenharmony_ci GFP_KERNEL | __GFP_NOFAIL); 127362306a36Sopenharmony_ci if (!be->compressed_pages) 127462306a36Sopenharmony_ci be->compressed_pages = 127562306a36Sopenharmony_ci kvcalloc(pclusterpages, sizeof(struct page *), 127662306a36Sopenharmony_ci GFP_KERNEL | __GFP_NOFAIL); 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci z_erofs_parse_out_bvecs(be); 127962306a36Sopenharmony_ci err2 = z_erofs_parse_in_bvecs(be, &overlapped); 128062306a36Sopenharmony_ci if (err2) 128162306a36Sopenharmony_ci err = err2; 128262306a36Sopenharmony_ci if (err) 128362306a36Sopenharmony_ci goto out; 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(pcl)) 128662306a36Sopenharmony_ci inputsize = pcl->tailpacking_size; 128762306a36Sopenharmony_ci else 128862306a36Sopenharmony_ci inputsize = pclusterpages * PAGE_SIZE; 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_ci err = decompressor->decompress(&(struct z_erofs_decompress_req) { 129162306a36Sopenharmony_ci .sb = be->sb, 129262306a36Sopenharmony_ci .in = be->compressed_pages, 129362306a36Sopenharmony_ci .out = be->decompressed_pages, 129462306a36Sopenharmony_ci .pageofs_in = pcl->pageofs_in, 129562306a36Sopenharmony_ci .pageofs_out = pcl->pageofs_out, 129662306a36Sopenharmony_ci .inputsize = inputsize, 129762306a36Sopenharmony_ci .outputsize = pcl->length, 129862306a36Sopenharmony_ci .alg = pcl->algorithmformat, 129962306a36Sopenharmony_ci .inplace_io = overlapped, 130062306a36Sopenharmony_ci .partial_decoding = pcl->partial, 130162306a36Sopenharmony_ci .fillgaps = pcl->multibases, 130262306a36Sopenharmony_ci }, be->pagepool); 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ciout: 130562306a36Sopenharmony_ci /* must handle all compressed pages before actual file pages */ 130662306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(pcl)) { 130762306a36Sopenharmony_ci page = pcl->compressed_bvecs[0].page; 130862306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); 130962306a36Sopenharmony_ci put_page(page); 131062306a36Sopenharmony_ci } else { 131162306a36Sopenharmony_ci for (i = 0; i < pclusterpages; ++i) { 131262306a36Sopenharmony_ci /* consider shortlived pages added when decompressing */ 131362306a36Sopenharmony_ci page = be->compressed_pages[i]; 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci if (erofs_page_is_managed(sbi, page)) 131662306a36Sopenharmony_ci continue; 131762306a36Sopenharmony_ci (void)z_erofs_put_shortlivedpage(be->pagepool, page); 131862306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); 131962306a36Sopenharmony_ci } 132062306a36Sopenharmony_ci } 132162306a36Sopenharmony_ci if (be->compressed_pages < be->onstack_pages || 132262306a36Sopenharmony_ci be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) 132362306a36Sopenharmony_ci kvfree(be->compressed_pages); 132462306a36Sopenharmony_ci z_erofs_fill_other_copies(be, err); 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci for (i = 0; i < be->nr_pages; ++i) { 132762306a36Sopenharmony_ci page = be->decompressed_pages[i]; 132862306a36Sopenharmony_ci if (!page) 132962306a36Sopenharmony_ci continue; 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci DBG_BUGON(z_erofs_page_is_invalidated(page)); 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci /* recycle all individual short-lived pages */ 133462306a36Sopenharmony_ci if (z_erofs_put_shortlivedpage(be->pagepool, page)) 133562306a36Sopenharmony_ci continue; 133662306a36Sopenharmony_ci z_erofs_onlinepage_endio(page, err); 133762306a36Sopenharmony_ci } 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci if (be->decompressed_pages != be->onstack_pages) 134062306a36Sopenharmony_ci kvfree(be->decompressed_pages); 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci pcl->length = 0; 134362306a36Sopenharmony_ci pcl->partial = true; 134462306a36Sopenharmony_ci pcl->multibases = false; 134562306a36Sopenharmony_ci pcl->bvset.nextpage = NULL; 134662306a36Sopenharmony_ci pcl->vcnt = 0; 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci /* pcluster lock MUST be taken before the following line */ 134962306a36Sopenharmony_ci WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); 135062306a36Sopenharmony_ci mutex_unlock(&pcl->lock); 135162306a36Sopenharmony_ci return err; 135262306a36Sopenharmony_ci} 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_cistatic void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, 135562306a36Sopenharmony_ci struct page **pagepool) 135662306a36Sopenharmony_ci{ 135762306a36Sopenharmony_ci struct z_erofs_decompress_backend be = { 135862306a36Sopenharmony_ci .sb = io->sb, 135962306a36Sopenharmony_ci .pagepool = pagepool, 136062306a36Sopenharmony_ci .decompressed_secondary_bvecs = 136162306a36Sopenharmony_ci LIST_HEAD_INIT(be.decompressed_secondary_bvecs), 136262306a36Sopenharmony_ci }; 136362306a36Sopenharmony_ci z_erofs_next_pcluster_t owned = io->head; 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci while (owned != Z_EROFS_PCLUSTER_TAIL) { 136662306a36Sopenharmony_ci DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci be.pcl = container_of(owned, struct z_erofs_pcluster, next); 136962306a36Sopenharmony_ci owned = READ_ONCE(be.pcl->next); 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0); 137262306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(be.pcl)) 137362306a36Sopenharmony_ci z_erofs_free_pcluster(be.pcl); 137462306a36Sopenharmony_ci else 137562306a36Sopenharmony_ci erofs_workgroup_put(&be.pcl->obj); 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci} 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_cistatic void z_erofs_decompressqueue_work(struct work_struct *work) 138062306a36Sopenharmony_ci{ 138162306a36Sopenharmony_ci struct z_erofs_decompressqueue *bgq = 138262306a36Sopenharmony_ci container_of(work, struct z_erofs_decompressqueue, u.work); 138362306a36Sopenharmony_ci struct page *pagepool = NULL; 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL); 138662306a36Sopenharmony_ci z_erofs_decompress_queue(bgq, &pagepool); 138762306a36Sopenharmony_ci erofs_release_pages(&pagepool); 138862306a36Sopenharmony_ci kvfree(bgq); 138962306a36Sopenharmony_ci} 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD 139262306a36Sopenharmony_cistatic void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) 139362306a36Sopenharmony_ci{ 139462306a36Sopenharmony_ci z_erofs_decompressqueue_work((struct work_struct *)work); 139562306a36Sopenharmony_ci} 139662306a36Sopenharmony_ci#endif 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_cistatic void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, 139962306a36Sopenharmony_ci int bios) 140062306a36Sopenharmony_ci{ 140162306a36Sopenharmony_ci struct erofs_sb_info *const sbi = EROFS_SB(io->sb); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci /* wake up the caller thread for sync decompression */ 140462306a36Sopenharmony_ci if (io->sync) { 140562306a36Sopenharmony_ci if (!atomic_add_return(bios, &io->pending_bios)) 140662306a36Sopenharmony_ci complete(&io->u.done); 140762306a36Sopenharmony_ci return; 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci if (atomic_add_return(bios, &io->pending_bios)) 141162306a36Sopenharmony_ci return; 141262306a36Sopenharmony_ci /* Use (kthread_)work and sync decompression for atomic contexts only */ 141362306a36Sopenharmony_ci if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) { 141462306a36Sopenharmony_ci#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD 141562306a36Sopenharmony_ci struct kthread_worker *worker; 141662306a36Sopenharmony_ci 141762306a36Sopenharmony_ci rcu_read_lock(); 141862306a36Sopenharmony_ci worker = rcu_dereference( 141962306a36Sopenharmony_ci z_erofs_pcpu_workers[raw_smp_processor_id()]); 142062306a36Sopenharmony_ci if (!worker) { 142162306a36Sopenharmony_ci INIT_WORK(&io->u.work, z_erofs_decompressqueue_work); 142262306a36Sopenharmony_ci queue_work(z_erofs_workqueue, &io->u.work); 142362306a36Sopenharmony_ci } else { 142462306a36Sopenharmony_ci kthread_queue_work(worker, &io->u.kthread_work); 142562306a36Sopenharmony_ci } 142662306a36Sopenharmony_ci rcu_read_unlock(); 142762306a36Sopenharmony_ci#else 142862306a36Sopenharmony_ci queue_work(z_erofs_workqueue, &io->u.work); 142962306a36Sopenharmony_ci#endif 143062306a36Sopenharmony_ci /* enable sync decompression for readahead */ 143162306a36Sopenharmony_ci if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) 143262306a36Sopenharmony_ci sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; 143362306a36Sopenharmony_ci return; 143462306a36Sopenharmony_ci } 143562306a36Sopenharmony_ci z_erofs_decompressqueue_work(&io->u.work); 143662306a36Sopenharmony_ci} 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_cistatic struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, 143962306a36Sopenharmony_ci unsigned int nr, 144062306a36Sopenharmony_ci struct page **pagepool, 144162306a36Sopenharmony_ci struct address_space *mc) 144262306a36Sopenharmony_ci{ 144362306a36Sopenharmony_ci const pgoff_t index = pcl->obj.index; 144462306a36Sopenharmony_ci gfp_t gfp = mapping_gfp_mask(mc); 144562306a36Sopenharmony_ci bool tocache = false; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci struct address_space *mapping; 144862306a36Sopenharmony_ci struct page *oldpage, *page; 144962306a36Sopenharmony_ci int justfound; 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_cirepeat: 145262306a36Sopenharmony_ci page = READ_ONCE(pcl->compressed_bvecs[nr].page); 145362306a36Sopenharmony_ci oldpage = page; 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci if (!page) 145662306a36Sopenharmony_ci goto out_allocpage; 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci justfound = (unsigned long)page & 1UL; 145962306a36Sopenharmony_ci page = (struct page *)((unsigned long)page & ~1UL); 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci /* 146262306a36Sopenharmony_ci * preallocated cached pages, which is used to avoid direct reclaim 146362306a36Sopenharmony_ci * otherwise, it will go inplace I/O path instead. 146462306a36Sopenharmony_ci */ 146562306a36Sopenharmony_ci if (page->private == Z_EROFS_PREALLOCATED_PAGE) { 146662306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); 146762306a36Sopenharmony_ci set_page_private(page, 0); 146862306a36Sopenharmony_ci tocache = true; 146962306a36Sopenharmony_ci goto out_tocache; 147062306a36Sopenharmony_ci } 147162306a36Sopenharmony_ci mapping = READ_ONCE(page->mapping); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci /* 147462306a36Sopenharmony_ci * file-backed online pages in plcuster are all locked steady, 147562306a36Sopenharmony_ci * therefore it is impossible for `mapping' to be NULL. 147662306a36Sopenharmony_ci */ 147762306a36Sopenharmony_ci if (mapping && mapping != mc) 147862306a36Sopenharmony_ci /* ought to be unmanaged pages */ 147962306a36Sopenharmony_ci goto out; 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci /* directly return for shortlived page as well */ 148262306a36Sopenharmony_ci if (z_erofs_is_shortlived_page(page)) 148362306a36Sopenharmony_ci goto out; 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci lock_page(page); 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci /* only true if page reclaim goes wrong, should never happen */ 148862306a36Sopenharmony_ci DBG_BUGON(justfound && PagePrivate(page)); 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci /* the page is still in manage cache */ 149162306a36Sopenharmony_ci if (page->mapping == mc) { 149262306a36Sopenharmony_ci WRITE_ONCE(pcl->compressed_bvecs[nr].page, page); 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci if (!PagePrivate(page)) { 149562306a36Sopenharmony_ci /* 149662306a36Sopenharmony_ci * impossible to be !PagePrivate(page) for 149762306a36Sopenharmony_ci * the current restriction as well if 149862306a36Sopenharmony_ci * the page is already in compressed_bvecs[]. 149962306a36Sopenharmony_ci */ 150062306a36Sopenharmony_ci DBG_BUGON(!justfound); 150162306a36Sopenharmony_ci 150262306a36Sopenharmony_ci justfound = 0; 150362306a36Sopenharmony_ci set_page_private(page, (unsigned long)pcl); 150462306a36Sopenharmony_ci SetPagePrivate(page); 150562306a36Sopenharmony_ci } 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci /* no need to submit io if it is already up-to-date */ 150862306a36Sopenharmony_ci if (PageUptodate(page)) { 150962306a36Sopenharmony_ci unlock_page(page); 151062306a36Sopenharmony_ci page = NULL; 151162306a36Sopenharmony_ci } 151262306a36Sopenharmony_ci goto out; 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci /* 151662306a36Sopenharmony_ci * the managed page has been truncated, it's unsafe to 151762306a36Sopenharmony_ci * reuse this one, let's allocate a new cache-managed page. 151862306a36Sopenharmony_ci */ 151962306a36Sopenharmony_ci DBG_BUGON(page->mapping); 152062306a36Sopenharmony_ci DBG_BUGON(!justfound); 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci tocache = true; 152362306a36Sopenharmony_ci unlock_page(page); 152462306a36Sopenharmony_ci put_page(page); 152562306a36Sopenharmony_ciout_allocpage: 152662306a36Sopenharmony_ci page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); 152762306a36Sopenharmony_ci if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page, 152862306a36Sopenharmony_ci oldpage, page)) { 152962306a36Sopenharmony_ci erofs_pagepool_add(pagepool, page); 153062306a36Sopenharmony_ci cond_resched(); 153162306a36Sopenharmony_ci goto repeat; 153262306a36Sopenharmony_ci } 153362306a36Sopenharmony_ciout_tocache: 153462306a36Sopenharmony_ci if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) { 153562306a36Sopenharmony_ci /* turn into temporary page if fails (1 ref) */ 153662306a36Sopenharmony_ci set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); 153762306a36Sopenharmony_ci goto out; 153862306a36Sopenharmony_ci } 153962306a36Sopenharmony_ci attach_page_private(page, pcl); 154062306a36Sopenharmony_ci /* drop a refcount added by allocpage (then we have 2 refs here) */ 154162306a36Sopenharmony_ci put_page(page); 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ciout: /* the only exit (for tracing and debugging) */ 154462306a36Sopenharmony_ci return page; 154562306a36Sopenharmony_ci} 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_cistatic struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, 154862306a36Sopenharmony_ci struct z_erofs_decompressqueue *fgq, bool *fg) 154962306a36Sopenharmony_ci{ 155062306a36Sopenharmony_ci struct z_erofs_decompressqueue *q; 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (fg && !*fg) { 155362306a36Sopenharmony_ci q = kvzalloc(sizeof(*q), GFP_KERNEL | __GFP_NOWARN); 155462306a36Sopenharmony_ci if (!q) { 155562306a36Sopenharmony_ci *fg = true; 155662306a36Sopenharmony_ci goto fg_out; 155762306a36Sopenharmony_ci } 155862306a36Sopenharmony_ci#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD 155962306a36Sopenharmony_ci kthread_init_work(&q->u.kthread_work, 156062306a36Sopenharmony_ci z_erofs_decompressqueue_kthread_work); 156162306a36Sopenharmony_ci#else 156262306a36Sopenharmony_ci INIT_WORK(&q->u.work, z_erofs_decompressqueue_work); 156362306a36Sopenharmony_ci#endif 156462306a36Sopenharmony_ci } else { 156562306a36Sopenharmony_cifg_out: 156662306a36Sopenharmony_ci q = fgq; 156762306a36Sopenharmony_ci init_completion(&fgq->u.done); 156862306a36Sopenharmony_ci atomic_set(&fgq->pending_bios, 0); 156962306a36Sopenharmony_ci q->eio = false; 157062306a36Sopenharmony_ci q->sync = true; 157162306a36Sopenharmony_ci } 157262306a36Sopenharmony_ci q->sb = sb; 157362306a36Sopenharmony_ci q->head = Z_EROFS_PCLUSTER_TAIL; 157462306a36Sopenharmony_ci return q; 157562306a36Sopenharmony_ci} 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_ci/* define decompression jobqueue types */ 157862306a36Sopenharmony_cienum { 157962306a36Sopenharmony_ci JQ_BYPASS, 158062306a36Sopenharmony_ci JQ_SUBMIT, 158162306a36Sopenharmony_ci NR_JOBQUEUES, 158262306a36Sopenharmony_ci}; 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_cistatic void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, 158562306a36Sopenharmony_ci z_erofs_next_pcluster_t qtail[], 158662306a36Sopenharmony_ci z_erofs_next_pcluster_t owned_head) 158762306a36Sopenharmony_ci{ 158862306a36Sopenharmony_ci z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT]; 158962306a36Sopenharmony_ci z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS]; 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL); 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci WRITE_ONCE(*submit_qtail, owned_head); 159462306a36Sopenharmony_ci WRITE_ONCE(*bypass_qtail, &pcl->next); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci qtail[JQ_BYPASS] = &pcl->next; 159762306a36Sopenharmony_ci} 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_cistatic void z_erofs_decompressqueue_endio(struct bio *bio) 160062306a36Sopenharmony_ci{ 160162306a36Sopenharmony_ci struct z_erofs_decompressqueue *q = bio->bi_private; 160262306a36Sopenharmony_ci blk_status_t err = bio->bi_status; 160362306a36Sopenharmony_ci struct bio_vec *bvec; 160462306a36Sopenharmony_ci struct bvec_iter_all iter_all; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 160762306a36Sopenharmony_ci struct page *page = bvec->bv_page; 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci DBG_BUGON(PageUptodate(page)); 161062306a36Sopenharmony_ci DBG_BUGON(z_erofs_page_is_invalidated(page)); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { 161362306a36Sopenharmony_ci if (!err) 161462306a36Sopenharmony_ci SetPageUptodate(page); 161562306a36Sopenharmony_ci unlock_page(page); 161662306a36Sopenharmony_ci } 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci if (err) 161962306a36Sopenharmony_ci q->eio = true; 162062306a36Sopenharmony_ci z_erofs_decompress_kickoff(q, -1); 162162306a36Sopenharmony_ci bio_put(bio); 162262306a36Sopenharmony_ci} 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_cistatic void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, 162562306a36Sopenharmony_ci struct z_erofs_decompressqueue *fgq, 162662306a36Sopenharmony_ci bool *force_fg, bool readahead) 162762306a36Sopenharmony_ci{ 162862306a36Sopenharmony_ci struct super_block *sb = f->inode->i_sb; 162962306a36Sopenharmony_ci struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb)); 163062306a36Sopenharmony_ci z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; 163162306a36Sopenharmony_ci struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; 163262306a36Sopenharmony_ci z_erofs_next_pcluster_t owned_head = f->owned_head; 163362306a36Sopenharmony_ci /* bio is NULL initially, so no need to initialize last_{index,bdev} */ 163462306a36Sopenharmony_ci pgoff_t last_index; 163562306a36Sopenharmony_ci struct block_device *last_bdev; 163662306a36Sopenharmony_ci unsigned int nr_bios = 0; 163762306a36Sopenharmony_ci struct bio *bio = NULL; 163862306a36Sopenharmony_ci unsigned long pflags; 163962306a36Sopenharmony_ci int memstall = 0; 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci /* 164262306a36Sopenharmony_ci * if managed cache is enabled, bypass jobqueue is needed, 164362306a36Sopenharmony_ci * no need to read from device for all pclusters in this queue. 164462306a36Sopenharmony_ci */ 164562306a36Sopenharmony_ci q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL); 164662306a36Sopenharmony_ci q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg); 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; 164962306a36Sopenharmony_ci qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci /* by default, all need io submission */ 165262306a36Sopenharmony_ci q[JQ_SUBMIT]->head = owned_head; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci do { 165562306a36Sopenharmony_ci struct erofs_map_dev mdev; 165662306a36Sopenharmony_ci struct z_erofs_pcluster *pcl; 165762306a36Sopenharmony_ci pgoff_t cur, end; 165862306a36Sopenharmony_ci unsigned int i = 0; 165962306a36Sopenharmony_ci bool bypass = true; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL); 166262306a36Sopenharmony_ci pcl = container_of(owned_head, struct z_erofs_pcluster, next); 166362306a36Sopenharmony_ci owned_head = READ_ONCE(pcl->next); 166462306a36Sopenharmony_ci 166562306a36Sopenharmony_ci if (z_erofs_is_inline_pcluster(pcl)) { 166662306a36Sopenharmony_ci move_to_bypass_jobqueue(pcl, qtail, owned_head); 166762306a36Sopenharmony_ci continue; 166862306a36Sopenharmony_ci } 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ci /* no device id here, thus it will always succeed */ 167162306a36Sopenharmony_ci mdev = (struct erofs_map_dev) { 167262306a36Sopenharmony_ci .m_pa = erofs_pos(sb, pcl->obj.index), 167362306a36Sopenharmony_ci }; 167462306a36Sopenharmony_ci (void)erofs_map_dev(sb, &mdev); 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci cur = erofs_blknr(sb, mdev.m_pa); 167762306a36Sopenharmony_ci end = cur + pcl->pclusterpages; 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci do { 168062306a36Sopenharmony_ci struct page *page; 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci page = pickup_page_for_submission(pcl, i++, 168362306a36Sopenharmony_ci &f->pagepool, mc); 168462306a36Sopenharmony_ci if (!page) 168562306a36Sopenharmony_ci continue; 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci if (bio && (cur != last_index + 1 || 168862306a36Sopenharmony_ci last_bdev != mdev.m_bdev)) { 168962306a36Sopenharmony_cisubmit_bio_retry: 169062306a36Sopenharmony_ci submit_bio(bio); 169162306a36Sopenharmony_ci if (memstall) { 169262306a36Sopenharmony_ci psi_memstall_leave(&pflags); 169362306a36Sopenharmony_ci memstall = 0; 169462306a36Sopenharmony_ci } 169562306a36Sopenharmony_ci bio = NULL; 169662306a36Sopenharmony_ci } 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci if (unlikely(PageWorkingset(page)) && !memstall) { 169962306a36Sopenharmony_ci psi_memstall_enter(&pflags); 170062306a36Sopenharmony_ci memstall = 1; 170162306a36Sopenharmony_ci } 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci if (!bio) { 170462306a36Sopenharmony_ci bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, 170562306a36Sopenharmony_ci REQ_OP_READ, GFP_NOIO); 170662306a36Sopenharmony_ci bio->bi_end_io = z_erofs_decompressqueue_endio; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci last_bdev = mdev.m_bdev; 170962306a36Sopenharmony_ci bio->bi_iter.bi_sector = (sector_t)cur << 171062306a36Sopenharmony_ci (sb->s_blocksize_bits - 9); 171162306a36Sopenharmony_ci bio->bi_private = q[JQ_SUBMIT]; 171262306a36Sopenharmony_ci if (readahead) 171362306a36Sopenharmony_ci bio->bi_opf |= REQ_RAHEAD; 171462306a36Sopenharmony_ci ++nr_bios; 171562306a36Sopenharmony_ci } 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) 171862306a36Sopenharmony_ci goto submit_bio_retry; 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci last_index = cur; 172162306a36Sopenharmony_ci bypass = false; 172262306a36Sopenharmony_ci } while (++cur < end); 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci if (!bypass) 172562306a36Sopenharmony_ci qtail[JQ_SUBMIT] = &pcl->next; 172662306a36Sopenharmony_ci else 172762306a36Sopenharmony_ci move_to_bypass_jobqueue(pcl, qtail, owned_head); 172862306a36Sopenharmony_ci } while (owned_head != Z_EROFS_PCLUSTER_TAIL); 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci if (bio) { 173162306a36Sopenharmony_ci submit_bio(bio); 173262306a36Sopenharmony_ci if (memstall) 173362306a36Sopenharmony_ci psi_memstall_leave(&pflags); 173462306a36Sopenharmony_ci } 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci /* 173762306a36Sopenharmony_ci * although background is preferred, no one is pending for submission. 173862306a36Sopenharmony_ci * don't issue decompression but drop it directly instead. 173962306a36Sopenharmony_ci */ 174062306a36Sopenharmony_ci if (!*force_fg && !nr_bios) { 174162306a36Sopenharmony_ci kvfree(q[JQ_SUBMIT]); 174262306a36Sopenharmony_ci return; 174362306a36Sopenharmony_ci } 174462306a36Sopenharmony_ci z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios); 174562306a36Sopenharmony_ci} 174662306a36Sopenharmony_ci 174762306a36Sopenharmony_cistatic void z_erofs_runqueue(struct z_erofs_decompress_frontend *f, 174862306a36Sopenharmony_ci bool force_fg, bool ra) 174962306a36Sopenharmony_ci{ 175062306a36Sopenharmony_ci struct z_erofs_decompressqueue io[NR_JOBQUEUES]; 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) 175362306a36Sopenharmony_ci return; 175462306a36Sopenharmony_ci z_erofs_submit_queue(f, io, &force_fg, ra); 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci /* handle bypass queue (no i/o pclusters) immediately */ 175762306a36Sopenharmony_ci z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci if (!force_fg) 176062306a36Sopenharmony_ci return; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci /* wait until all bios are completed */ 176362306a36Sopenharmony_ci wait_for_completion_io(&io[JQ_SUBMIT].u.done); 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci /* handle synchronous decompress queue in the caller context */ 176662306a36Sopenharmony_ci z_erofs_decompress_queue(&io[JQ_SUBMIT], &f->pagepool); 176762306a36Sopenharmony_ci} 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci/* 177062306a36Sopenharmony_ci * Since partial uptodate is still unimplemented for now, we have to use 177162306a36Sopenharmony_ci * approximate readmore strategies as a start. 177262306a36Sopenharmony_ci */ 177362306a36Sopenharmony_cistatic void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, 177462306a36Sopenharmony_ci struct readahead_control *rac, bool backmost) 177562306a36Sopenharmony_ci{ 177662306a36Sopenharmony_ci struct inode *inode = f->inode; 177762306a36Sopenharmony_ci struct erofs_map_blocks *map = &f->map; 177862306a36Sopenharmony_ci erofs_off_t cur, end, headoffset = f->headoffset; 177962306a36Sopenharmony_ci int err; 178062306a36Sopenharmony_ci 178162306a36Sopenharmony_ci if (backmost) { 178262306a36Sopenharmony_ci if (rac) 178362306a36Sopenharmony_ci end = headoffset + readahead_length(rac) - 1; 178462306a36Sopenharmony_ci else 178562306a36Sopenharmony_ci end = headoffset + PAGE_SIZE - 1; 178662306a36Sopenharmony_ci map->m_la = end; 178762306a36Sopenharmony_ci err = z_erofs_map_blocks_iter(inode, map, 178862306a36Sopenharmony_ci EROFS_GET_BLOCKS_READMORE); 178962306a36Sopenharmony_ci if (err) 179062306a36Sopenharmony_ci return; 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci /* expand ra for the trailing edge if readahead */ 179362306a36Sopenharmony_ci if (rac) { 179462306a36Sopenharmony_ci cur = round_up(map->m_la + map->m_llen, PAGE_SIZE); 179562306a36Sopenharmony_ci readahead_expand(rac, headoffset, cur - headoffset); 179662306a36Sopenharmony_ci return; 179762306a36Sopenharmony_ci } 179862306a36Sopenharmony_ci end = round_up(end, PAGE_SIZE); 179962306a36Sopenharmony_ci } else { 180062306a36Sopenharmony_ci end = round_up(map->m_la, PAGE_SIZE); 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci if (!map->m_llen) 180362306a36Sopenharmony_ci return; 180462306a36Sopenharmony_ci } 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci cur = map->m_la + map->m_llen - 1; 180762306a36Sopenharmony_ci while ((cur >= end) && (cur < i_size_read(inode))) { 180862306a36Sopenharmony_ci pgoff_t index = cur >> PAGE_SHIFT; 180962306a36Sopenharmony_ci struct page *page; 181062306a36Sopenharmony_ci 181162306a36Sopenharmony_ci page = erofs_grab_cache_page_nowait(inode->i_mapping, index); 181262306a36Sopenharmony_ci if (page) { 181362306a36Sopenharmony_ci if (PageUptodate(page)) 181462306a36Sopenharmony_ci unlock_page(page); 181562306a36Sopenharmony_ci else 181662306a36Sopenharmony_ci (void)z_erofs_do_read_page(f, page); 181762306a36Sopenharmony_ci put_page(page); 181862306a36Sopenharmony_ci } 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci if (cur < PAGE_SIZE) 182162306a36Sopenharmony_ci break; 182262306a36Sopenharmony_ci cur = (index << PAGE_SHIFT) - 1; 182362306a36Sopenharmony_ci } 182462306a36Sopenharmony_ci} 182562306a36Sopenharmony_ci 182662306a36Sopenharmony_cistatic int z_erofs_read_folio(struct file *file, struct folio *folio) 182762306a36Sopenharmony_ci{ 182862306a36Sopenharmony_ci struct inode *const inode = folio->mapping->host; 182962306a36Sopenharmony_ci struct erofs_sb_info *const sbi = EROFS_I_SB(inode); 183062306a36Sopenharmony_ci struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); 183162306a36Sopenharmony_ci int err; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci trace_erofs_read_folio(folio, false); 183462306a36Sopenharmony_ci f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci z_erofs_pcluster_readmore(&f, NULL, true); 183762306a36Sopenharmony_ci err = z_erofs_do_read_page(&f, &folio->page); 183862306a36Sopenharmony_ci z_erofs_pcluster_readmore(&f, NULL, false); 183962306a36Sopenharmony_ci z_erofs_pcluster_end(&f); 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci /* if some compressed cluster ready, need submit them anyway */ 184262306a36Sopenharmony_ci z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false); 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci if (err && err != -EINTR) 184562306a36Sopenharmony_ci erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu", 184662306a36Sopenharmony_ci err, folio->index, EROFS_I(inode)->nid); 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci erofs_put_metabuf(&f.map.buf); 184962306a36Sopenharmony_ci erofs_release_pages(&f.pagepool); 185062306a36Sopenharmony_ci return err; 185162306a36Sopenharmony_ci} 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_cistatic void z_erofs_readahead(struct readahead_control *rac) 185462306a36Sopenharmony_ci{ 185562306a36Sopenharmony_ci struct inode *const inode = rac->mapping->host; 185662306a36Sopenharmony_ci struct erofs_sb_info *const sbi = EROFS_I_SB(inode); 185762306a36Sopenharmony_ci struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); 185862306a36Sopenharmony_ci struct folio *head = NULL, *folio; 185962306a36Sopenharmony_ci unsigned int nr_folios; 186062306a36Sopenharmony_ci int err; 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci f.headoffset = readahead_pos(rac); 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci z_erofs_pcluster_readmore(&f, rac, true); 186562306a36Sopenharmony_ci nr_folios = readahead_count(rac); 186662306a36Sopenharmony_ci trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false); 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci while ((folio = readahead_folio(rac))) { 186962306a36Sopenharmony_ci folio->private = head; 187062306a36Sopenharmony_ci head = folio; 187162306a36Sopenharmony_ci } 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci /* traverse in reverse order for best metadata I/O performance */ 187462306a36Sopenharmony_ci while (head) { 187562306a36Sopenharmony_ci folio = head; 187662306a36Sopenharmony_ci head = folio_get_private(folio); 187762306a36Sopenharmony_ci 187862306a36Sopenharmony_ci err = z_erofs_do_read_page(&f, &folio->page); 187962306a36Sopenharmony_ci if (err && err != -EINTR) 188062306a36Sopenharmony_ci erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", 188162306a36Sopenharmony_ci folio->index, EROFS_I(inode)->nid); 188262306a36Sopenharmony_ci } 188362306a36Sopenharmony_ci z_erofs_pcluster_readmore(&f, rac, false); 188462306a36Sopenharmony_ci z_erofs_pcluster_end(&f); 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true); 188762306a36Sopenharmony_ci erofs_put_metabuf(&f.map.buf); 188862306a36Sopenharmony_ci erofs_release_pages(&f.pagepool); 188962306a36Sopenharmony_ci} 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ciconst struct address_space_operations z_erofs_aops = { 189262306a36Sopenharmony_ci .read_folio = z_erofs_read_folio, 189362306a36Sopenharmony_ci .readahead = z_erofs_readahead, 189462306a36Sopenharmony_ci}; 1895