1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7#include "internal.h" 8#include <linux/pagevec.h> 9 10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11{ 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22} 23 24#if (EROFS_PCPUBUF_NR_PAGES > 0) 25static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29void *erofs_get_pcpubuf(unsigned int pagenr) 30{ 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33} 34#endif 35 36#ifdef CONFIG_EROFS_FS_ZIP 37/* global shrink count (for all mounted EROFS instances) */ 38static atomic_long_t erofs_global_shrink_cnt; 39 40static int erofs_workgroup_get(struct erofs_workgroup *grp) 41{ 42 int o; 43 44repeat: 45 o = erofs_wait_on_workgroup_freezed(grp); 46 if (o <= 0) 47 return -1; 48 49 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 50 goto repeat; 51 52 /* decrease refcount paired by erofs_workgroup_put */ 53 if (o == 1) 54 atomic_long_dec(&erofs_global_shrink_cnt); 55 return 0; 56} 57 58struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 59 pgoff_t index) 60{ 61 struct erofs_sb_info *sbi = EROFS_SB(sb); 62 struct erofs_workgroup *grp; 63 64repeat: 65 rcu_read_lock(); 66 grp = xa_load(&sbi->managed_pslots, index); 67 if (grp) { 68 if (erofs_workgroup_get(grp)) { 69 /* prefer to relax rcu read side */ 70 rcu_read_unlock(); 71 goto repeat; 72 } 73 74 DBG_BUGON(index != grp->index); 75 } 76 rcu_read_unlock(); 77 return grp; 78} 79 80struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 81 struct erofs_workgroup *grp) 82{ 83 struct erofs_sb_info *const sbi = EROFS_SB(sb); 84 struct erofs_workgroup *pre; 85 86 /* 87 * Bump up a reference count before making this visible 88 * to others for the XArray in order to avoid potential 89 * UAF without serialized by xa_lock. 90 */ 91 atomic_inc(&grp->refcount); 92 93repeat: 94 xa_lock(&sbi->managed_pslots); 95 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 96 NULL, grp, GFP_NOFS); 97 if (pre) { 98 if (xa_is_err(pre)) { 99 pre = ERR_PTR(xa_err(pre)); 100 } else if (erofs_workgroup_get(pre)) { 101 /* try to legitimize the current in-tree one */ 102 xa_unlock(&sbi->managed_pslots); 103 cond_resched(); 104 goto repeat; 105 } 106 atomic_dec(&grp->refcount); 107 grp = pre; 108 } 109 xa_unlock(&sbi->managed_pslots); 110 return grp; 111} 112 113static void __erofs_workgroup_free(struct erofs_workgroup *grp) 114{ 115 atomic_long_dec(&erofs_global_shrink_cnt); 116 erofs_workgroup_free_rcu(grp); 117} 118 119int erofs_workgroup_put(struct erofs_workgroup *grp) 120{ 121 int count = atomic_dec_return(&grp->refcount); 122 123 if (count == 1) 124 atomic_long_inc(&erofs_global_shrink_cnt); 125 else if (!count) 126 __erofs_workgroup_free(grp); 127 return count; 128} 129 130static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 131 struct erofs_workgroup *grp) 132{ 133 /* 134 * If managed cache is on, refcount of workgroups 135 * themselves could be < 0 (freezed). In other words, 136 * there is no guarantee that all refcounts > 0. 137 */ 138 if (!erofs_workgroup_try_to_freeze(grp, 1)) 139 return false; 140 141 /* 142 * Note that all cached pages should be unattached 143 * before deleted from the XArray. Otherwise some 144 * cached pages could be still attached to the orphan 145 * old workgroup when the new one is available in the tree. 146 */ 147 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 148 erofs_workgroup_unfreeze(grp, 1); 149 return false; 150 } 151 152 /* 153 * It's impossible to fail after the workgroup is freezed, 154 * however in order to avoid some race conditions, add a 155 * DBG_BUGON to observe this in advance. 156 */ 157 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); 158 159 /* last refcount should be connected with its managed pslot. */ 160 erofs_workgroup_unfreeze(grp, 0); 161 __erofs_workgroup_free(grp); 162 return true; 163} 164 165static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 166 unsigned long nr_shrink) 167{ 168 struct erofs_workgroup *grp; 169 unsigned int freed = 0; 170 unsigned long index; 171 172 xa_lock(&sbi->managed_pslots); 173 xa_for_each(&sbi->managed_pslots, index, grp) { 174 /* try to shrink each valid workgroup */ 175 if (!erofs_try_to_release_workgroup(sbi, grp)) 176 continue; 177 xa_unlock(&sbi->managed_pslots); 178 179 ++freed; 180 if (!--nr_shrink) 181 return freed; 182 xa_lock(&sbi->managed_pslots); 183 } 184 xa_unlock(&sbi->managed_pslots); 185 return freed; 186} 187 188/* protected by 'erofs_sb_list_lock' */ 189static unsigned int shrinker_run_no; 190 191/* protects the mounted 'erofs_sb_list' */ 192static DEFINE_SPINLOCK(erofs_sb_list_lock); 193static LIST_HEAD(erofs_sb_list); 194 195void erofs_shrinker_register(struct super_block *sb) 196{ 197 struct erofs_sb_info *sbi = EROFS_SB(sb); 198 199 mutex_init(&sbi->umount_mutex); 200 201 spin_lock(&erofs_sb_list_lock); 202 list_add(&sbi->list, &erofs_sb_list); 203 spin_unlock(&erofs_sb_list_lock); 204} 205 206void erofs_shrinker_unregister(struct super_block *sb) 207{ 208 struct erofs_sb_info *const sbi = EROFS_SB(sb); 209 210 mutex_lock(&sbi->umount_mutex); 211 /* clean up all remaining workgroups in memory */ 212 erofs_shrink_workstation(sbi, ~0UL); 213 214 spin_lock(&erofs_sb_list_lock); 215 list_del(&sbi->list); 216 spin_unlock(&erofs_sb_list_lock); 217 mutex_unlock(&sbi->umount_mutex); 218} 219 220static unsigned long erofs_shrink_count(struct shrinker *shrink, 221 struct shrink_control *sc) 222{ 223 return atomic_long_read(&erofs_global_shrink_cnt); 224} 225 226static unsigned long erofs_shrink_scan(struct shrinker *shrink, 227 struct shrink_control *sc) 228{ 229 struct erofs_sb_info *sbi; 230 struct list_head *p; 231 232 unsigned long nr = sc->nr_to_scan; 233 unsigned int run_no; 234 unsigned long freed = 0; 235 236 spin_lock(&erofs_sb_list_lock); 237 do { 238 run_no = ++shrinker_run_no; 239 } while (run_no == 0); 240 241 /* Iterate over all mounted superblocks and try to shrink them */ 242 p = erofs_sb_list.next; 243 while (p != &erofs_sb_list) { 244 sbi = list_entry(p, struct erofs_sb_info, list); 245 246 /* 247 * We move the ones we do to the end of the list, so we stop 248 * when we see one we have already done. 249 */ 250 if (sbi->shrinker_run_no == run_no) 251 break; 252 253 if (!mutex_trylock(&sbi->umount_mutex)) { 254 p = p->next; 255 continue; 256 } 257 258 spin_unlock(&erofs_sb_list_lock); 259 sbi->shrinker_run_no = run_no; 260 261 freed += erofs_shrink_workstation(sbi, nr - freed); 262 263 spin_lock(&erofs_sb_list_lock); 264 /* Get the next list element before we move this one */ 265 p = p->next; 266 267 /* 268 * Move this one to the end of the list to provide some 269 * fairness. 270 */ 271 list_move_tail(&sbi->list, &erofs_sb_list); 272 mutex_unlock(&sbi->umount_mutex); 273 274 if (freed >= nr) 275 break; 276 } 277 spin_unlock(&erofs_sb_list_lock); 278 return freed; 279} 280 281static struct shrinker erofs_shrinker_info = { 282 .scan_objects = erofs_shrink_scan, 283 .count_objects = erofs_shrink_count, 284 .seeks = DEFAULT_SEEKS, 285}; 286 287int __init erofs_init_shrinker(void) 288{ 289 return register_shrinker(&erofs_shrinker_info); 290} 291 292void erofs_exit_shrinker(void) 293{ 294 unregister_shrinker(&erofs_shrinker_info); 295} 296#endif /* !CONFIG_EROFS_FS_ZIP */ 297 298