18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * uptodate.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Tracking the up-to-date-ness of a local buffer_head with respect to 88c2ecf20Sopenharmony_ci * the cluster. 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * Standard buffer head caching flags (uptodate, etc) are insufficient 138c2ecf20Sopenharmony_ci * in a clustered environment - a buffer may be marked up to date on 148c2ecf20Sopenharmony_ci * our local node but could have been modified by another cluster 158c2ecf20Sopenharmony_ci * member. As a result an additional (and performant) caching scheme 168c2ecf20Sopenharmony_ci * is required. A further requirement is that we consume as little 178c2ecf20Sopenharmony_ci * memory as possible - we never pin buffer_head structures in order 188c2ecf20Sopenharmony_ci * to cache them. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * We track the existence of up to date buffers on the inodes which 218c2ecf20Sopenharmony_ci * are associated with them. Because we don't want to pin 228c2ecf20Sopenharmony_ci * buffer_heads, this is only a (strong) hint and several other checks 238c2ecf20Sopenharmony_ci * are made in the I/O path to ensure that we don't use a stale or 248c2ecf20Sopenharmony_ci * invalid buffer without going to disk: 258c2ecf20Sopenharmony_ci * - buffer_jbd is used liberally - if a bh is in the journal on 268c2ecf20Sopenharmony_ci * this node then it *must* be up to date. 278c2ecf20Sopenharmony_ci * - the standard buffer_uptodate() macro is used to detect buffers 288c2ecf20Sopenharmony_ci * which may be invalid (even if we have an up to date tracking 298c2ecf20Sopenharmony_ci * item for them) 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * For a full understanding of how this code works together, one 328c2ecf20Sopenharmony_ci * should read the callers in dlmglue.c, the I/O functions in 338c2ecf20Sopenharmony_ci * buffer_head_io.c and ocfs2_journal_access in journal.c 348c2ecf20Sopenharmony_ci */ 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#include <linux/fs.h> 378c2ecf20Sopenharmony_ci#include <linux/types.h> 388c2ecf20Sopenharmony_ci#include <linux/slab.h> 398c2ecf20Sopenharmony_ci#include <linux/highmem.h> 408c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> 418c2ecf20Sopenharmony_ci#include <linux/rbtree.h> 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#include <cluster/masklog.h> 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci#include "ocfs2.h" 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#include "inode.h" 488c2ecf20Sopenharmony_ci#include "uptodate.h" 498c2ecf20Sopenharmony_ci#include "ocfs2_trace.h" 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistruct ocfs2_meta_cache_item { 528c2ecf20Sopenharmony_ci struct rb_node c_node; 538c2ecf20Sopenharmony_ci sector_t c_block; 548c2ecf20Sopenharmony_ci}; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_cistatic struct kmem_cache *ocfs2_uptodate_cachep; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ciu64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci return ci->ci_ops->co_owner(ci); 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_cistruct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci) 668c2ecf20Sopenharmony_ci{ 678c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci return ci->ci_ops->co_get_super(ci); 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci) 738c2ecf20Sopenharmony_ci{ 748c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci ci->ci_ops->co_cache_lock(ci); 778c2ecf20Sopenharmony_ci} 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cistatic void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci) 808c2ecf20Sopenharmony_ci{ 818c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci ci->ci_ops->co_cache_unlock(ci); 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_civoid ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci ci->ci_ops->co_io_lock(ci); 918c2ecf20Sopenharmony_ci} 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_civoid ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci) 948c2ecf20Sopenharmony_ci{ 958c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci ci->ci_ops->co_io_unlock(ci); 988c2ecf20Sopenharmony_ci} 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_cistatic void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci, 1028c2ecf20Sopenharmony_ci int clear) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci ci->ci_flags |= OCFS2_CACHE_FL_INLINE; 1058c2ecf20Sopenharmony_ci ci->ci_num_cached = 0; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci if (clear) { 1088c2ecf20Sopenharmony_ci ci->ci_created_trans = 0; 1098c2ecf20Sopenharmony_ci ci->ci_last_trans = 0; 1108c2ecf20Sopenharmony_ci } 1118c2ecf20Sopenharmony_ci} 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_civoid ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci, 1148c2ecf20Sopenharmony_ci const struct ocfs2_caching_operations *ops) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci BUG_ON(!ops); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci ci->ci_ops = ops; 1198c2ecf20Sopenharmony_ci ocfs2_metadata_cache_reset(ci, 1); 1208c2ecf20Sopenharmony_ci} 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_civoid ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci) 1238c2ecf20Sopenharmony_ci{ 1248c2ecf20Sopenharmony_ci ocfs2_metadata_cache_purge(ci); 1258c2ecf20Sopenharmony_ci ocfs2_metadata_cache_reset(ci, 1); 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci/* No lock taken here as 'root' is not expected to be visible to other 1308c2ecf20Sopenharmony_ci * processes. */ 1318c2ecf20Sopenharmony_cistatic unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) 1328c2ecf20Sopenharmony_ci{ 1338c2ecf20Sopenharmony_ci unsigned int purged = 0; 1348c2ecf20Sopenharmony_ci struct rb_node *node; 1358c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *item; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci while ((node = rb_last(root)) != NULL) { 1388c2ecf20Sopenharmony_ci item = rb_entry(node, struct ocfs2_meta_cache_item, c_node); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci trace_ocfs2_purge_copied_metadata_tree( 1418c2ecf20Sopenharmony_ci (unsigned long long) item->c_block); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci rb_erase(&item->c_node, root); 1448c2ecf20Sopenharmony_ci kmem_cache_free(ocfs2_uptodate_cachep, item); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci purged++; 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci return purged; 1498c2ecf20Sopenharmony_ci} 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci/* Called from locking and called from ocfs2_clear_inode. Dump the 1528c2ecf20Sopenharmony_ci * cache for a given inode. 1538c2ecf20Sopenharmony_ci * 1548c2ecf20Sopenharmony_ci * This function is a few more lines longer than necessary due to some 1558c2ecf20Sopenharmony_ci * accounting done here, but I think it's worth tracking down those 1568c2ecf20Sopenharmony_ci * bugs sooner -- Mark */ 1578c2ecf20Sopenharmony_civoid ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci unsigned int tree, to_purge, purged; 1608c2ecf20Sopenharmony_ci struct rb_root root = RB_ROOT; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci ocfs2_metadata_cache_lock(ci); 1658c2ecf20Sopenharmony_ci tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE); 1668c2ecf20Sopenharmony_ci to_purge = ci->ci_num_cached; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci trace_ocfs2_metadata_cache_purge( 1698c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 1708c2ecf20Sopenharmony_ci to_purge, tree); 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci /* If we're a tree, save off the root so that we can safely 1738c2ecf20Sopenharmony_ci * initialize the cache. We do the work to free tree members 1748c2ecf20Sopenharmony_ci * without the spinlock. */ 1758c2ecf20Sopenharmony_ci if (tree) 1768c2ecf20Sopenharmony_ci root = ci->ci_cache.ci_tree; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci ocfs2_metadata_cache_reset(ci, 0); 1798c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci purged = ocfs2_purge_copied_metadata_tree(&root); 1828c2ecf20Sopenharmony_ci /* If possible, track the number wiped so that we can more 1838c2ecf20Sopenharmony_ci * easily detect counting errors. Unfortunately, this is only 1848c2ecf20Sopenharmony_ci * meaningful for trees. */ 1858c2ecf20Sopenharmony_ci if (tree && purged != to_purge) 1868c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n", 1878c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 1888c2ecf20Sopenharmony_ci to_purge, purged); 1898c2ecf20Sopenharmony_ci} 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci/* Returns the index in the cache array, -1 if not found. 1928c2ecf20Sopenharmony_ci * Requires ip_lock. */ 1938c2ecf20Sopenharmony_cistatic int ocfs2_search_cache_array(struct ocfs2_caching_info *ci, 1948c2ecf20Sopenharmony_ci sector_t item) 1958c2ecf20Sopenharmony_ci{ 1968c2ecf20Sopenharmony_ci int i; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci for (i = 0; i < ci->ci_num_cached; i++) { 1998c2ecf20Sopenharmony_ci if (item == ci->ci_cache.ci_array[i]) 2008c2ecf20Sopenharmony_ci return i; 2018c2ecf20Sopenharmony_ci } 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci return -1; 2048c2ecf20Sopenharmony_ci} 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci/* Returns the cache item if found, otherwise NULL. 2078c2ecf20Sopenharmony_ci * Requires ip_lock. */ 2088c2ecf20Sopenharmony_cistatic struct ocfs2_meta_cache_item * 2098c2ecf20Sopenharmony_ciocfs2_search_cache_tree(struct ocfs2_caching_info *ci, 2108c2ecf20Sopenharmony_ci sector_t block) 2118c2ecf20Sopenharmony_ci{ 2128c2ecf20Sopenharmony_ci struct rb_node * n = ci->ci_cache.ci_tree.rb_node; 2138c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *item = NULL; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci while (n) { 2168c2ecf20Sopenharmony_ci item = rb_entry(n, struct ocfs2_meta_cache_item, c_node); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (block < item->c_block) 2198c2ecf20Sopenharmony_ci n = n->rb_left; 2208c2ecf20Sopenharmony_ci else if (block > item->c_block) 2218c2ecf20Sopenharmony_ci n = n->rb_right; 2228c2ecf20Sopenharmony_ci else 2238c2ecf20Sopenharmony_ci return item; 2248c2ecf20Sopenharmony_ci } 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci return NULL; 2278c2ecf20Sopenharmony_ci} 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_cistatic int ocfs2_buffer_cached(struct ocfs2_caching_info *ci, 2308c2ecf20Sopenharmony_ci struct buffer_head *bh) 2318c2ecf20Sopenharmony_ci{ 2328c2ecf20Sopenharmony_ci int index = -1; 2338c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *item = NULL; 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci ocfs2_metadata_cache_lock(ci); 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci trace_ocfs2_buffer_cached_begin( 2388c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 2398c2ecf20Sopenharmony_ci (unsigned long long) bh->b_blocknr, 2408c2ecf20Sopenharmony_ci !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE)); 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) 2438c2ecf20Sopenharmony_ci index = ocfs2_search_cache_array(ci, bh->b_blocknr); 2448c2ecf20Sopenharmony_ci else 2458c2ecf20Sopenharmony_ci item = ocfs2_search_cache_tree(ci, bh->b_blocknr); 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci trace_ocfs2_buffer_cached_end(index, item); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci return (index != -1) || (item != NULL); 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci/* Warning: even if it returns true, this does *not* guarantee that 2558c2ecf20Sopenharmony_ci * the block is stored in our inode metadata cache. 2568c2ecf20Sopenharmony_ci * 2578c2ecf20Sopenharmony_ci * This can be called under lock_buffer() 2588c2ecf20Sopenharmony_ci */ 2598c2ecf20Sopenharmony_ciint ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, 2608c2ecf20Sopenharmony_ci struct buffer_head *bh) 2618c2ecf20Sopenharmony_ci{ 2628c2ecf20Sopenharmony_ci /* Doesn't matter if the bh is in our cache or not -- if it's 2638c2ecf20Sopenharmony_ci * not marked uptodate then we know it can't have correct 2648c2ecf20Sopenharmony_ci * data. */ 2658c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) 2668c2ecf20Sopenharmony_ci return 0; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci /* OCFS2 does not allow multiple nodes to be changing the same 2698c2ecf20Sopenharmony_ci * block at the same time. */ 2708c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) 2718c2ecf20Sopenharmony_ci return 1; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* Ok, locally the buffer is marked as up to date, now search 2748c2ecf20Sopenharmony_ci * our cache to see if we can trust that. */ 2758c2ecf20Sopenharmony_ci return ocfs2_buffer_cached(ci, bh); 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci/* 2798c2ecf20Sopenharmony_ci * Determine whether a buffer is currently out on a read-ahead request. 2808c2ecf20Sopenharmony_ci * ci_io_sem should be held to serialize submitters with the logic here. 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ciint ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci, 2838c2ecf20Sopenharmony_ci struct buffer_head *bh) 2848c2ecf20Sopenharmony_ci{ 2858c2ecf20Sopenharmony_ci return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh); 2868c2ecf20Sopenharmony_ci} 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci/* Requires ip_lock */ 2898c2ecf20Sopenharmony_cistatic void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, 2908c2ecf20Sopenharmony_ci sector_t block) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY); 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci trace_ocfs2_append_cache_array( 2958c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 2968c2ecf20Sopenharmony_ci (unsigned long long)block, ci->ci_num_cached); 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci ci->ci_cache.ci_array[ci->ci_num_cached] = block; 2998c2ecf20Sopenharmony_ci ci->ci_num_cached++; 3008c2ecf20Sopenharmony_ci} 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci/* By now the caller should have checked that the item does *not* 3038c2ecf20Sopenharmony_ci * exist in the tree. 3048c2ecf20Sopenharmony_ci * Requires ip_lock. */ 3058c2ecf20Sopenharmony_cistatic void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci, 3068c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *new) 3078c2ecf20Sopenharmony_ci{ 3088c2ecf20Sopenharmony_ci sector_t block = new->c_block; 3098c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 3108c2ecf20Sopenharmony_ci struct rb_node **p = &ci->ci_cache.ci_tree.rb_node; 3118c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *tmp; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci trace_ocfs2_insert_cache_tree( 3148c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 3158c2ecf20Sopenharmony_ci (unsigned long long)block, ci->ci_num_cached); 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci while(*p) { 3188c2ecf20Sopenharmony_ci parent = *p; 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci tmp = rb_entry(parent, struct ocfs2_meta_cache_item, c_node); 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci if (block < tmp->c_block) 3238c2ecf20Sopenharmony_ci p = &(*p)->rb_left; 3248c2ecf20Sopenharmony_ci else if (block > tmp->c_block) 3258c2ecf20Sopenharmony_ci p = &(*p)->rb_right; 3268c2ecf20Sopenharmony_ci else { 3278c2ecf20Sopenharmony_ci /* This should never happen! */ 3288c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Duplicate block %llu cached!\n", 3298c2ecf20Sopenharmony_ci (unsigned long long) block); 3308c2ecf20Sopenharmony_ci BUG(); 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci } 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci rb_link_node(&new->c_node, parent, p); 3358c2ecf20Sopenharmony_ci rb_insert_color(&new->c_node, &ci->ci_cache.ci_tree); 3368c2ecf20Sopenharmony_ci ci->ci_num_cached++; 3378c2ecf20Sopenharmony_ci} 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci/* co_cache_lock() must be held */ 3408c2ecf20Sopenharmony_cistatic inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci) 3418c2ecf20Sopenharmony_ci{ 3428c2ecf20Sopenharmony_ci return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) && 3438c2ecf20Sopenharmony_ci (ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY); 3448c2ecf20Sopenharmony_ci} 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci/* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the 3478c2ecf20Sopenharmony_ci * pointers in tree after we use them - this allows caller to detect 3488c2ecf20Sopenharmony_ci * when to free in case of error. 3498c2ecf20Sopenharmony_ci * 3508c2ecf20Sopenharmony_ci * The co_cache_lock() must be held. */ 3518c2ecf20Sopenharmony_cistatic void ocfs2_expand_cache(struct ocfs2_caching_info *ci, 3528c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item **tree) 3538c2ecf20Sopenharmony_ci{ 3548c2ecf20Sopenharmony_ci int i; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY, 3578c2ecf20Sopenharmony_ci "Owner %llu, num cached = %u, should be %u\n", 3588c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 3598c2ecf20Sopenharmony_ci ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY); 3608c2ecf20Sopenharmony_ci mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE), 3618c2ecf20Sopenharmony_ci "Owner %llu not marked as inline anymore!\n", 3628c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci)); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci /* Be careful to initialize the tree members *first* because 3658c2ecf20Sopenharmony_ci * once the ci_tree is used, the array is junk... */ 3668c2ecf20Sopenharmony_ci for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) 3678c2ecf20Sopenharmony_ci tree[i]->c_block = ci->ci_cache.ci_array[i]; 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE; 3708c2ecf20Sopenharmony_ci ci->ci_cache.ci_tree = RB_ROOT; 3718c2ecf20Sopenharmony_ci /* this will be set again by __ocfs2_insert_cache_tree */ 3728c2ecf20Sopenharmony_ci ci->ci_num_cached = 0; 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) { 3758c2ecf20Sopenharmony_ci __ocfs2_insert_cache_tree(ci, tree[i]); 3768c2ecf20Sopenharmony_ci tree[i] = NULL; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci trace_ocfs2_expand_cache( 3808c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 3818c2ecf20Sopenharmony_ci ci->ci_flags, ci->ci_num_cached); 3828c2ecf20Sopenharmony_ci} 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci/* Slow path function - memory allocation is necessary. See the 3858c2ecf20Sopenharmony_ci * comment above ocfs2_set_buffer_uptodate for more information. */ 3868c2ecf20Sopenharmony_cistatic void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci, 3878c2ecf20Sopenharmony_ci sector_t block, 3888c2ecf20Sopenharmony_ci int expand_tree) 3898c2ecf20Sopenharmony_ci{ 3908c2ecf20Sopenharmony_ci int i; 3918c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *new = NULL; 3928c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] = 3938c2ecf20Sopenharmony_ci { NULL, }; 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci trace_ocfs2_set_buffer_uptodate( 3968c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 3978c2ecf20Sopenharmony_ci (unsigned long long)block, expand_tree); 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); 4008c2ecf20Sopenharmony_ci if (!new) { 4018c2ecf20Sopenharmony_ci mlog_errno(-ENOMEM); 4028c2ecf20Sopenharmony_ci return; 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci new->c_block = block; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci if (expand_tree) { 4078c2ecf20Sopenharmony_ci /* Do *not* allocate an array here - the removal code 4088c2ecf20Sopenharmony_ci * has no way of tracking that. */ 4098c2ecf20Sopenharmony_ci for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) { 4108c2ecf20Sopenharmony_ci tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, 4118c2ecf20Sopenharmony_ci GFP_NOFS); 4128c2ecf20Sopenharmony_ci if (!tree[i]) { 4138c2ecf20Sopenharmony_ci mlog_errno(-ENOMEM); 4148c2ecf20Sopenharmony_ci goto out_free; 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci /* These are initialized in ocfs2_expand_cache! */ 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci } 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci ocfs2_metadata_cache_lock(ci); 4228c2ecf20Sopenharmony_ci if (ocfs2_insert_can_use_array(ci)) { 4238c2ecf20Sopenharmony_ci /* Ok, items were removed from the cache in between 4248c2ecf20Sopenharmony_ci * locks. Detect this and revert back to the fast path */ 4258c2ecf20Sopenharmony_ci ocfs2_append_cache_array(ci, block); 4268c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 4278c2ecf20Sopenharmony_ci goto out_free; 4288c2ecf20Sopenharmony_ci } 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci if (expand_tree) 4318c2ecf20Sopenharmony_ci ocfs2_expand_cache(ci, tree); 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci __ocfs2_insert_cache_tree(ci, new); 4348c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci new = NULL; 4378c2ecf20Sopenharmony_ciout_free: 4388c2ecf20Sopenharmony_ci if (new) 4398c2ecf20Sopenharmony_ci kmem_cache_free(ocfs2_uptodate_cachep, new); 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci /* If these were used, then ocfs2_expand_cache re-set them to 4428c2ecf20Sopenharmony_ci * NULL for us. */ 4438c2ecf20Sopenharmony_ci if (tree[0]) { 4448c2ecf20Sopenharmony_ci for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) 4458c2ecf20Sopenharmony_ci if (tree[i]) 4468c2ecf20Sopenharmony_ci kmem_cache_free(ocfs2_uptodate_cachep, 4478c2ecf20Sopenharmony_ci tree[i]); 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci/* Item insertion is guarded by co_io_lock(), so the insertion path takes 4528c2ecf20Sopenharmony_ci * advantage of this by not rechecking for a duplicate insert during 4538c2ecf20Sopenharmony_ci * the slow case. Additionally, if the cache needs to be bumped up to 4548c2ecf20Sopenharmony_ci * a tree, the code will not recheck after acquiring the lock -- 4558c2ecf20Sopenharmony_ci * multiple paths cannot be expanding to a tree at the same time. 4568c2ecf20Sopenharmony_ci * 4578c2ecf20Sopenharmony_ci * The slow path takes into account that items can be removed 4588c2ecf20Sopenharmony_ci * (including the whole tree wiped and reset) when this process it out 4598c2ecf20Sopenharmony_ci * allocating memory. In those cases, it reverts back to the fast 4608c2ecf20Sopenharmony_ci * path. 4618c2ecf20Sopenharmony_ci * 4628c2ecf20Sopenharmony_ci * Note that this function may actually fail to insert the block if 4638c2ecf20Sopenharmony_ci * memory cannot be allocated. This is not fatal however (but may 4648c2ecf20Sopenharmony_ci * result in a performance penalty) 4658c2ecf20Sopenharmony_ci * 4668c2ecf20Sopenharmony_ci * Readahead buffers can be passed in here before the I/O request is 4678c2ecf20Sopenharmony_ci * completed. 4688c2ecf20Sopenharmony_ci */ 4698c2ecf20Sopenharmony_civoid ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci, 4708c2ecf20Sopenharmony_ci struct buffer_head *bh) 4718c2ecf20Sopenharmony_ci{ 4728c2ecf20Sopenharmony_ci int expand; 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci /* The block may very well exist in our cache already, so avoid 4758c2ecf20Sopenharmony_ci * doing any more work in that case. */ 4768c2ecf20Sopenharmony_ci if (ocfs2_buffer_cached(ci, bh)) 4778c2ecf20Sopenharmony_ci return; 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci trace_ocfs2_set_buffer_uptodate_begin( 4808c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 4818c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci /* No need to recheck under spinlock - insertion is guarded by 4848c2ecf20Sopenharmony_ci * co_io_lock() */ 4858c2ecf20Sopenharmony_ci ocfs2_metadata_cache_lock(ci); 4868c2ecf20Sopenharmony_ci if (ocfs2_insert_can_use_array(ci)) { 4878c2ecf20Sopenharmony_ci /* Fast case - it's an array and there's a free 4888c2ecf20Sopenharmony_ci * spot. */ 4898c2ecf20Sopenharmony_ci ocfs2_append_cache_array(ci, bh->b_blocknr); 4908c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 4918c2ecf20Sopenharmony_ci return; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci expand = 0; 4958c2ecf20Sopenharmony_ci if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { 4968c2ecf20Sopenharmony_ci /* We need to bump things up to a tree. */ 4978c2ecf20Sopenharmony_ci expand = 1; 4988c2ecf20Sopenharmony_ci } 4998c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci __ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand); 5028c2ecf20Sopenharmony_ci} 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci/* Called against a newly allocated buffer. Most likely nobody should 5058c2ecf20Sopenharmony_ci * be able to read this sort of metadata while it's still being 5068c2ecf20Sopenharmony_ci * allocated, but this is careful to take co_io_lock() anyway. */ 5078c2ecf20Sopenharmony_civoid ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci, 5088c2ecf20Sopenharmony_ci struct buffer_head *bh) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci /* This should definitely *not* exist in our cache */ 5118c2ecf20Sopenharmony_ci BUG_ON(ocfs2_buffer_cached(ci, bh)); 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_lock(ci); 5168c2ecf20Sopenharmony_ci ocfs2_set_buffer_uptodate(ci, bh); 5178c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_unlock(ci); 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci/* Requires ip_lock. */ 5218c2ecf20Sopenharmony_cistatic void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci, 5228c2ecf20Sopenharmony_ci int index) 5238c2ecf20Sopenharmony_ci{ 5248c2ecf20Sopenharmony_ci sector_t *array = ci->ci_cache.ci_array; 5258c2ecf20Sopenharmony_ci int bytes; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY); 5288c2ecf20Sopenharmony_ci BUG_ON(index >= ci->ci_num_cached); 5298c2ecf20Sopenharmony_ci BUG_ON(!ci->ci_num_cached); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci trace_ocfs2_remove_metadata_array( 5328c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 5338c2ecf20Sopenharmony_ci index, ci->ci_num_cached); 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci ci->ci_num_cached--; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci /* don't need to copy if the array is now empty, or if we 5388c2ecf20Sopenharmony_ci * removed at the tail */ 5398c2ecf20Sopenharmony_ci if (ci->ci_num_cached && index < ci->ci_num_cached) { 5408c2ecf20Sopenharmony_ci bytes = sizeof(sector_t) * (ci->ci_num_cached - index); 5418c2ecf20Sopenharmony_ci memmove(&array[index], &array[index + 1], bytes); 5428c2ecf20Sopenharmony_ci } 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* Requires ip_lock. */ 5468c2ecf20Sopenharmony_cistatic void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, 5478c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *item) 5488c2ecf20Sopenharmony_ci{ 5498c2ecf20Sopenharmony_ci trace_ocfs2_remove_metadata_tree( 5508c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 5518c2ecf20Sopenharmony_ci (unsigned long long)item->c_block); 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci rb_erase(&item->c_node, &ci->ci_cache.ci_tree); 5548c2ecf20Sopenharmony_ci ci->ci_num_cached--; 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_cistatic void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci, 5588c2ecf20Sopenharmony_ci sector_t block) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci int index; 5618c2ecf20Sopenharmony_ci struct ocfs2_meta_cache_item *item = NULL; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci ocfs2_metadata_cache_lock(ci); 5648c2ecf20Sopenharmony_ci trace_ocfs2_remove_block_from_cache( 5658c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 5668c2ecf20Sopenharmony_ci (unsigned long long) block, ci->ci_num_cached, 5678c2ecf20Sopenharmony_ci ci->ci_flags); 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { 5708c2ecf20Sopenharmony_ci index = ocfs2_search_cache_array(ci, block); 5718c2ecf20Sopenharmony_ci if (index != -1) 5728c2ecf20Sopenharmony_ci ocfs2_remove_metadata_array(ci, index); 5738c2ecf20Sopenharmony_ci } else { 5748c2ecf20Sopenharmony_ci item = ocfs2_search_cache_tree(ci, block); 5758c2ecf20Sopenharmony_ci if (item) 5768c2ecf20Sopenharmony_ci ocfs2_remove_metadata_tree(ci, item); 5778c2ecf20Sopenharmony_ci } 5788c2ecf20Sopenharmony_ci ocfs2_metadata_cache_unlock(ci); 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci if (item) 5818c2ecf20Sopenharmony_ci kmem_cache_free(ocfs2_uptodate_cachep, item); 5828c2ecf20Sopenharmony_ci} 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci/* 5858c2ecf20Sopenharmony_ci * Called when we remove a chunk of metadata from an inode. We don't 5868c2ecf20Sopenharmony_ci * bother reverting things to an inlined array in the case of a remove 5878c2ecf20Sopenharmony_ci * which moves us back under the limit. 5888c2ecf20Sopenharmony_ci */ 5898c2ecf20Sopenharmony_civoid ocfs2_remove_from_cache(struct ocfs2_caching_info *ci, 5908c2ecf20Sopenharmony_ci struct buffer_head *bh) 5918c2ecf20Sopenharmony_ci{ 5928c2ecf20Sopenharmony_ci sector_t block = bh->b_blocknr; 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci ocfs2_remove_block_from_cache(ci, block); 5958c2ecf20Sopenharmony_ci} 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci/* Called when we remove xattr clusters from an inode. */ 5988c2ecf20Sopenharmony_civoid ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci, 5998c2ecf20Sopenharmony_ci sector_t block, 6008c2ecf20Sopenharmony_ci u32 c_len) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 6038c2ecf20Sopenharmony_ci unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len; 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci for (i = 0; i < b_len; i++, block++) 6068c2ecf20Sopenharmony_ci ocfs2_remove_block_from_cache(ci, block); 6078c2ecf20Sopenharmony_ci} 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ciint __init init_ocfs2_uptodate_cache(void) 6108c2ecf20Sopenharmony_ci{ 6118c2ecf20Sopenharmony_ci ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", 6128c2ecf20Sopenharmony_ci sizeof(struct ocfs2_meta_cache_item), 6138c2ecf20Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN, NULL); 6148c2ecf20Sopenharmony_ci if (!ocfs2_uptodate_cachep) 6158c2ecf20Sopenharmony_ci return -ENOMEM; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci return 0; 6188c2ecf20Sopenharmony_ci} 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_civoid exit_ocfs2_uptodate_cache(void) 6218c2ecf20Sopenharmony_ci{ 6228c2ecf20Sopenharmony_ci kmem_cache_destroy(ocfs2_uptodate_cachep); 6238c2ecf20Sopenharmony_ci} 624