18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * localalloc.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Node local data allocation 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle. All rights reserved. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/fs.h> 138c2ecf20Sopenharmony_ci#include <linux/types.h> 148c2ecf20Sopenharmony_ci#include <linux/slab.h> 158c2ecf20Sopenharmony_ci#include <linux/highmem.h> 168c2ecf20Sopenharmony_ci#include <linux/bitops.h> 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#include <cluster/masklog.h> 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include "ocfs2.h" 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include "alloc.h" 238c2ecf20Sopenharmony_ci#include "blockcheck.h" 248c2ecf20Sopenharmony_ci#include "dlmglue.h" 258c2ecf20Sopenharmony_ci#include "inode.h" 268c2ecf20Sopenharmony_ci#include "journal.h" 278c2ecf20Sopenharmony_ci#include "localalloc.h" 288c2ecf20Sopenharmony_ci#include "suballoc.h" 298c2ecf20Sopenharmony_ci#include "super.h" 308c2ecf20Sopenharmony_ci#include "sysfile.h" 318c2ecf20Sopenharmony_ci#include "ocfs2_trace.h" 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include "buffer_head_io.h" 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_cistatic u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 408c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc, 418c2ecf20Sopenharmony_ci u32 *numbits, 428c2ecf20Sopenharmony_ci struct ocfs2_alloc_reservation *resv); 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cistatic void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cistatic int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 478c2ecf20Sopenharmony_ci handle_t *handle, 488c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc, 498c2ecf20Sopenharmony_ci struct inode *main_bm_inode, 508c2ecf20Sopenharmony_ci struct buffer_head *main_bm_bh); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 538c2ecf20Sopenharmony_ci struct ocfs2_alloc_context **ac, 548c2ecf20Sopenharmony_ci struct inode **bitmap_inode, 558c2ecf20Sopenharmony_ci struct buffer_head **bitmap_bh); 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 588c2ecf20Sopenharmony_ci handle_t *handle, 598c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 628c2ecf20Sopenharmony_ci struct inode *local_alloc_inode); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/* 658c2ecf20Sopenharmony_ci * ocfs2_la_default_mb() - determine a default size, in megabytes of 668c2ecf20Sopenharmony_ci * the local alloc. 678c2ecf20Sopenharmony_ci * 688c2ecf20Sopenharmony_ci * Generally, we'd like to pick as large a local alloc as 698c2ecf20Sopenharmony_ci * possible. Performance on large workloads tends to scale 708c2ecf20Sopenharmony_ci * proportionally to la size. In addition to that, the reservations 718c2ecf20Sopenharmony_ci * code functions more efficiently as it can reserve more windows for 728c2ecf20Sopenharmony_ci * write. 738c2ecf20Sopenharmony_ci * 748c2ecf20Sopenharmony_ci * Some things work against us when trying to choose a large local alloc: 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * - We need to ensure our sizing is picked to leave enough space in 778c2ecf20Sopenharmony_ci * group descriptors for other allocations (such as block groups, 788c2ecf20Sopenharmony_ci * etc). Picking default sizes which are a multiple of 4 could help 798c2ecf20Sopenharmony_ci * - block groups are allocated in 2mb and 4mb chunks. 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * - Likewise, we don't want to starve other nodes of bits on small 828c2ecf20Sopenharmony_ci * file systems. This can easily be taken care of by limiting our 838c2ecf20Sopenharmony_ci * default to a reasonable size (256M) on larger cluster sizes. 848c2ecf20Sopenharmony_ci * 858c2ecf20Sopenharmony_ci * - Some file systems can't support very large sizes - 4k and 8k in 868c2ecf20Sopenharmony_ci * particular are limited to less than 128 and 256 megabytes respectively. 878c2ecf20Sopenharmony_ci * 888c2ecf20Sopenharmony_ci * The following reference table shows group descriptor and local 898c2ecf20Sopenharmony_ci * alloc maximums at various cluster sizes (4k blocksize) 908c2ecf20Sopenharmony_ci * 918c2ecf20Sopenharmony_ci * csize: 4K group: 126M la: 121M 928c2ecf20Sopenharmony_ci * csize: 8K group: 252M la: 243M 938c2ecf20Sopenharmony_ci * csize: 16K group: 504M la: 486M 948c2ecf20Sopenharmony_ci * csize: 32K group: 1008M la: 972M 958c2ecf20Sopenharmony_ci * csize: 64K group: 2016M la: 1944M 968c2ecf20Sopenharmony_ci * csize: 128K group: 4032M la: 3888M 978c2ecf20Sopenharmony_ci * csize: 256K group: 8064M la: 7776M 988c2ecf20Sopenharmony_ci * csize: 512K group: 16128M la: 15552M 998c2ecf20Sopenharmony_ci * csize: 1024K group: 32256M la: 31104M 1008c2ecf20Sopenharmony_ci */ 1018c2ecf20Sopenharmony_ci#define OCFS2_LA_MAX_DEFAULT_MB 256 1028c2ecf20Sopenharmony_ci#define OCFS2_LA_OLD_DEFAULT 8 1038c2ecf20Sopenharmony_ciunsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 1048c2ecf20Sopenharmony_ci{ 1058c2ecf20Sopenharmony_ci unsigned int la_mb; 1068c2ecf20Sopenharmony_ci unsigned int gd_mb; 1078c2ecf20Sopenharmony_ci unsigned int la_max_mb; 1088c2ecf20Sopenharmony_ci unsigned int megs_per_slot; 1098c2ecf20Sopenharmony_ci struct super_block *sb = osb->sb; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 1128c2ecf20Sopenharmony_ci 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci /* 1158c2ecf20Sopenharmony_ci * This takes care of files systems with very small group 1168c2ecf20Sopenharmony_ci * descriptors - 512 byte blocksize at cluster sizes lower 1178c2ecf20Sopenharmony_ci * than 16K and also 1k blocksize with 4k cluster size. 1188c2ecf20Sopenharmony_ci */ 1198c2ecf20Sopenharmony_ci if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 1208c2ecf20Sopenharmony_ci || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 1218c2ecf20Sopenharmony_ci return OCFS2_LA_OLD_DEFAULT; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci /* 1248c2ecf20Sopenharmony_ci * Leave enough room for some block groups and make the final 1258c2ecf20Sopenharmony_ci * value we work from a multiple of 4. 1268c2ecf20Sopenharmony_ci */ 1278c2ecf20Sopenharmony_ci gd_mb -= 16; 1288c2ecf20Sopenharmony_ci gd_mb &= 0xFFFFFFFB; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci la_mb = gd_mb; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci /* 1338c2ecf20Sopenharmony_ci * Keep window sizes down to a reasonable default 1348c2ecf20Sopenharmony_ci */ 1358c2ecf20Sopenharmony_ci if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 1368c2ecf20Sopenharmony_ci /* 1378c2ecf20Sopenharmony_ci * Some clustersize / blocksize combinations will have 1388c2ecf20Sopenharmony_ci * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 1398c2ecf20Sopenharmony_ci * default size, but get poor distribution when 1408c2ecf20Sopenharmony_ci * limited to exactly 256 megabytes. 1418c2ecf20Sopenharmony_ci * 1428c2ecf20Sopenharmony_ci * As an example, 16K clustersize at 4K blocksize 1438c2ecf20Sopenharmony_ci * gives us a cluster group size of 504M. Paring the 1448c2ecf20Sopenharmony_ci * local alloc size down to 256 however, would give us 1458c2ecf20Sopenharmony_ci * only one window and around 200MB left in the 1468c2ecf20Sopenharmony_ci * cluster group. Instead, find the first size below 1478c2ecf20Sopenharmony_ci * 256 which would give us an even distribution. 1488c2ecf20Sopenharmony_ci * 1498c2ecf20Sopenharmony_ci * Larger cluster group sizes actually work out pretty 1508c2ecf20Sopenharmony_ci * well when pared to 256, so we don't have to do this 1518c2ecf20Sopenharmony_ci * for any group that fits more than two 1528c2ecf20Sopenharmony_ci * OCFS2_LA_MAX_DEFAULT_MB windows. 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_ci if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 1558c2ecf20Sopenharmony_ci la_mb = 256; 1568c2ecf20Sopenharmony_ci else { 1578c2ecf20Sopenharmony_ci unsigned int gd_mult = gd_mb; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci while (gd_mult > 256) 1608c2ecf20Sopenharmony_ci gd_mult = gd_mult >> 1; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci la_mb = gd_mult; 1638c2ecf20Sopenharmony_ci } 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 1678c2ecf20Sopenharmony_ci megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 1688c2ecf20Sopenharmony_ci /* Too many nodes, too few disk clusters. */ 1698c2ecf20Sopenharmony_ci if (megs_per_slot < la_mb) 1708c2ecf20Sopenharmony_ci la_mb = megs_per_slot; 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci /* We can't store more bits than we can in a block. */ 1738c2ecf20Sopenharmony_ci la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 1748c2ecf20Sopenharmony_ci ocfs2_local_alloc_size(sb) * 8); 1758c2ecf20Sopenharmony_ci if (la_mb > la_max_mb) 1768c2ecf20Sopenharmony_ci la_mb = la_max_mb; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci return la_mb; 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_civoid ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci struct super_block *sb = osb->sb; 1848c2ecf20Sopenharmony_ci unsigned int la_default_mb = ocfs2_la_default_mb(osb); 1858c2ecf20Sopenharmony_ci unsigned int la_max_mb; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci la_max_mb = ocfs2_clusters_to_megabytes(sb, 1888c2ecf20Sopenharmony_ci ocfs2_local_alloc_size(sb) * 8); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci if (requested_mb == -1) { 1938c2ecf20Sopenharmony_ci /* No user request - use defaults */ 1948c2ecf20Sopenharmony_ci osb->local_alloc_default_bits = 1958c2ecf20Sopenharmony_ci ocfs2_megabytes_to_clusters(sb, la_default_mb); 1968c2ecf20Sopenharmony_ci } else if (requested_mb > la_max_mb) { 1978c2ecf20Sopenharmony_ci /* Request is too big, we give the maximum available */ 1988c2ecf20Sopenharmony_ci osb->local_alloc_default_bits = 1998c2ecf20Sopenharmony_ci ocfs2_megabytes_to_clusters(sb, la_max_mb); 2008c2ecf20Sopenharmony_ci } else { 2018c2ecf20Sopenharmony_ci osb->local_alloc_default_bits = 2028c2ecf20Sopenharmony_ci ocfs2_megabytes_to_clusters(sb, requested_mb); 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci osb->local_alloc_bits = osb->local_alloc_default_bits; 2068c2ecf20Sopenharmony_ci} 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cistatic inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 2118c2ecf20Sopenharmony_ci osb->local_alloc_state == OCFS2_LA_ENABLED); 2128c2ecf20Sopenharmony_ci} 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_civoid ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 2158c2ecf20Sopenharmony_ci unsigned int num_clusters) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 2188c2ecf20Sopenharmony_ci if (osb->local_alloc_state == OCFS2_LA_DISABLED || 2198c2ecf20Sopenharmony_ci osb->local_alloc_state == OCFS2_LA_THROTTLED) 2208c2ecf20Sopenharmony_ci if (num_clusters >= osb->local_alloc_default_bits) { 2218c2ecf20Sopenharmony_ci cancel_delayed_work(&osb->la_enable_wq); 2228c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_ENABLED; 2238c2ecf20Sopenharmony_ci } 2248c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_civoid ocfs2_la_enable_worker(struct work_struct *work) 2288c2ecf20Sopenharmony_ci{ 2298c2ecf20Sopenharmony_ci struct ocfs2_super *osb = 2308c2ecf20Sopenharmony_ci container_of(work, struct ocfs2_super, 2318c2ecf20Sopenharmony_ci la_enable_wq.work); 2328c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 2338c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_ENABLED; 2348c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci/* 2388c2ecf20Sopenharmony_ci * Tell us whether a given allocation should use the local alloc 2398c2ecf20Sopenharmony_ci * file. Otherwise, it has to go to the main bitmap. 2408c2ecf20Sopenharmony_ci * 2418c2ecf20Sopenharmony_ci * This function does semi-dirty reads of local alloc size and state! 2428c2ecf20Sopenharmony_ci * This is ok however, as the values are re-checked once under mutex. 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_ciint ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci int ret = 0; 2478c2ecf20Sopenharmony_ci int la_bits; 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 2508c2ecf20Sopenharmony_ci la_bits = osb->local_alloc_bits; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci if (!ocfs2_la_state_enabled(osb)) 2538c2ecf20Sopenharmony_ci goto bail; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci /* la_bits should be at least twice the size (in clusters) of 2568c2ecf20Sopenharmony_ci * a new block group. We want to be sure block group 2578c2ecf20Sopenharmony_ci * allocations go through the local alloc, so allow an 2588c2ecf20Sopenharmony_ci * allocation to take up to half the bitmap. */ 2598c2ecf20Sopenharmony_ci if (bits > (la_bits / 2)) 2608c2ecf20Sopenharmony_ci goto bail; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci ret = 1; 2638c2ecf20Sopenharmony_cibail: 2648c2ecf20Sopenharmony_ci trace_ocfs2_alloc_should_use_local( 2658c2ecf20Sopenharmony_ci (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 2668c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 2678c2ecf20Sopenharmony_ci return ret; 2688c2ecf20Sopenharmony_ci} 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ciint ocfs2_load_local_alloc(struct ocfs2_super *osb) 2718c2ecf20Sopenharmony_ci{ 2728c2ecf20Sopenharmony_ci int status = 0; 2738c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc = NULL; 2748c2ecf20Sopenharmony_ci struct buffer_head *alloc_bh = NULL; 2758c2ecf20Sopenharmony_ci u32 num_used; 2768c2ecf20Sopenharmony_ci struct inode *inode = NULL; 2778c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci if (osb->local_alloc_bits == 0) 2808c2ecf20Sopenharmony_ci goto bail; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci if (osb->local_alloc_bits >= osb->bitmap_cpg) { 2838c2ecf20Sopenharmony_ci mlog(ML_NOTICE, "Requested local alloc window %d is larger " 2848c2ecf20Sopenharmony_ci "than max possible %u. Using defaults.\n", 2858c2ecf20Sopenharmony_ci osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 2868c2ecf20Sopenharmony_ci osb->local_alloc_bits = 2878c2ecf20Sopenharmony_ci ocfs2_megabytes_to_clusters(osb->sb, 2888c2ecf20Sopenharmony_ci ocfs2_la_default_mb(osb)); 2898c2ecf20Sopenharmony_ci } 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci /* read the alloc off disk */ 2928c2ecf20Sopenharmony_ci inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 2938c2ecf20Sopenharmony_ci osb->slot_num); 2948c2ecf20Sopenharmony_ci if (!inode) { 2958c2ecf20Sopenharmony_ci status = -EINVAL; 2968c2ecf20Sopenharmony_ci mlog_errno(status); 2978c2ecf20Sopenharmony_ci goto bail; 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci status = ocfs2_read_inode_block_full(inode, &alloc_bh, 3018c2ecf20Sopenharmony_ci OCFS2_BH_IGNORE_CACHE); 3028c2ecf20Sopenharmony_ci if (status < 0) { 3038c2ecf20Sopenharmony_ci mlog_errno(status); 3048c2ecf20Sopenharmony_ci goto bail; 3058c2ecf20Sopenharmony_ci } 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 3088c2ecf20Sopenharmony_ci la = OCFS2_LOCAL_ALLOC(alloc); 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci if (!(le32_to_cpu(alloc->i_flags) & 3118c2ecf20Sopenharmony_ci (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 3128c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 3138c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno); 3148c2ecf20Sopenharmony_ci status = -EINVAL; 3158c2ecf20Sopenharmony_ci goto bail; 3168c2ecf20Sopenharmony_ci } 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci if ((la->la_size == 0) || 3198c2ecf20Sopenharmony_ci (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 3208c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 3218c2ecf20Sopenharmony_ci le16_to_cpu(la->la_size)); 3228c2ecf20Sopenharmony_ci status = -EINVAL; 3238c2ecf20Sopenharmony_ci goto bail; 3248c2ecf20Sopenharmony_ci } 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci /* do a little verification. */ 3278c2ecf20Sopenharmony_ci num_used = ocfs2_local_alloc_count_bits(alloc); 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci /* hopefully the local alloc has always been recovered before 3308c2ecf20Sopenharmony_ci * we load it. */ 3318c2ecf20Sopenharmony_ci if (num_used 3328c2ecf20Sopenharmony_ci || alloc->id1.bitmap1.i_used 3338c2ecf20Sopenharmony_ci || alloc->id1.bitmap1.i_total 3348c2ecf20Sopenharmony_ci || la->la_bm_off) { 3358c2ecf20Sopenharmony_ci mlog(ML_ERROR, "inconsistent detected, clean journal with" 3368c2ecf20Sopenharmony_ci " unrecovered local alloc, please run fsck.ocfs2!\n" 3378c2ecf20Sopenharmony_ci "found = %u, set = %u, taken = %u, off = %u\n", 3388c2ecf20Sopenharmony_ci num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 3398c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total), 3408c2ecf20Sopenharmony_ci OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci status = -EINVAL; 3438c2ecf20Sopenharmony_ci goto bail; 3448c2ecf20Sopenharmony_ci } 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci osb->local_alloc_bh = alloc_bh; 3478c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_ENABLED; 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_cibail: 3508c2ecf20Sopenharmony_ci if (status < 0) 3518c2ecf20Sopenharmony_ci brelse(alloc_bh); 3528c2ecf20Sopenharmony_ci iput(inode); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci if (status) 3578c2ecf20Sopenharmony_ci mlog_errno(status); 3588c2ecf20Sopenharmony_ci return status; 3598c2ecf20Sopenharmony_ci} 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci/* 3628c2ecf20Sopenharmony_ci * return any unused bits to the bitmap and write out a clean 3638c2ecf20Sopenharmony_ci * local_alloc. 3648c2ecf20Sopenharmony_ci * 3658c2ecf20Sopenharmony_ci * local_alloc_bh is optional. If not passed, we will simply use the 3668c2ecf20Sopenharmony_ci * one off osb. If you do pass it however, be warned that it *will* be 3678c2ecf20Sopenharmony_ci * returned brelse'd and NULL'd out.*/ 3688c2ecf20Sopenharmony_civoid ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 3698c2ecf20Sopenharmony_ci{ 3708c2ecf20Sopenharmony_ci int status; 3718c2ecf20Sopenharmony_ci handle_t *handle; 3728c2ecf20Sopenharmony_ci struct inode *local_alloc_inode = NULL; 3738c2ecf20Sopenharmony_ci struct buffer_head *bh = NULL; 3748c2ecf20Sopenharmony_ci struct buffer_head *main_bm_bh = NULL; 3758c2ecf20Sopenharmony_ci struct inode *main_bm_inode = NULL; 3768c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc_copy = NULL; 3778c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc = NULL; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci cancel_delayed_work(&osb->la_enable_wq); 3808c2ecf20Sopenharmony_ci if (osb->ocfs2_wq) 3818c2ecf20Sopenharmony_ci flush_workqueue(osb->ocfs2_wq); 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci if (osb->local_alloc_state == OCFS2_LA_UNUSED) 3848c2ecf20Sopenharmony_ci goto out; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci local_alloc_inode = 3878c2ecf20Sopenharmony_ci ocfs2_get_system_file_inode(osb, 3888c2ecf20Sopenharmony_ci LOCAL_ALLOC_SYSTEM_INODE, 3898c2ecf20Sopenharmony_ci osb->slot_num); 3908c2ecf20Sopenharmony_ci if (!local_alloc_inode) { 3918c2ecf20Sopenharmony_ci status = -ENOENT; 3928c2ecf20Sopenharmony_ci mlog_errno(status); 3938c2ecf20Sopenharmony_ci goto out; 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_DISABLED; 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci ocfs2_resmap_uninit(&osb->osb_la_resmap); 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci main_bm_inode = ocfs2_get_system_file_inode(osb, 4018c2ecf20Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 4028c2ecf20Sopenharmony_ci OCFS2_INVALID_SLOT); 4038c2ecf20Sopenharmony_ci if (!main_bm_inode) { 4048c2ecf20Sopenharmony_ci status = -EINVAL; 4058c2ecf20Sopenharmony_ci mlog_errno(status); 4068c2ecf20Sopenharmony_ci goto out; 4078c2ecf20Sopenharmony_ci } 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci inode_lock(main_bm_inode); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 4128c2ecf20Sopenharmony_ci if (status < 0) { 4138c2ecf20Sopenharmony_ci mlog_errno(status); 4148c2ecf20Sopenharmony_ci goto out_mutex; 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci /* WINDOW_MOVE_CREDITS is a bit heavy... */ 4188c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 4198c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 4208c2ecf20Sopenharmony_ci mlog_errno(PTR_ERR(handle)); 4218c2ecf20Sopenharmony_ci handle = NULL; 4228c2ecf20Sopenharmony_ci goto out_unlock; 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci bh = osb->local_alloc_bh; 4268c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) bh->b_data; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 4298c2ecf20Sopenharmony_ci if (!alloc_copy) { 4308c2ecf20Sopenharmony_ci status = -ENOMEM; 4318c2ecf20Sopenharmony_ci goto out_commit; 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 4358c2ecf20Sopenharmony_ci bh, OCFS2_JOURNAL_ACCESS_WRITE); 4368c2ecf20Sopenharmony_ci if (status < 0) { 4378c2ecf20Sopenharmony_ci mlog_errno(status); 4388c2ecf20Sopenharmony_ci goto out_commit; 4398c2ecf20Sopenharmony_ci } 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci ocfs2_clear_local_alloc(alloc); 4428c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, bh); 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci brelse(bh); 4458c2ecf20Sopenharmony_ci osb->local_alloc_bh = NULL; 4468c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_UNUSED; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 4498c2ecf20Sopenharmony_ci main_bm_inode, main_bm_bh); 4508c2ecf20Sopenharmony_ci if (status < 0) 4518c2ecf20Sopenharmony_ci mlog_errno(status); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ciout_commit: 4548c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ciout_unlock: 4578c2ecf20Sopenharmony_ci brelse(main_bm_bh); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci ocfs2_inode_unlock(main_bm_inode, 1); 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ciout_mutex: 4628c2ecf20Sopenharmony_ci inode_unlock(main_bm_inode); 4638c2ecf20Sopenharmony_ci iput(main_bm_inode); 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ciout: 4668c2ecf20Sopenharmony_ci iput(local_alloc_inode); 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci kfree(alloc_copy); 4698c2ecf20Sopenharmony_ci} 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci/* 4728c2ecf20Sopenharmony_ci * We want to free the bitmap bits outside of any recovery context as 4738c2ecf20Sopenharmony_ci * we'll need a cluster lock to do so, but we must clear the local 4748c2ecf20Sopenharmony_ci * alloc before giving up the recovered nodes journal. To solve this, 4758c2ecf20Sopenharmony_ci * we kmalloc a copy of the local alloc before it's change for the 4768c2ecf20Sopenharmony_ci * caller to process with ocfs2_complete_local_alloc_recovery 4778c2ecf20Sopenharmony_ci */ 4788c2ecf20Sopenharmony_ciint ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 4798c2ecf20Sopenharmony_ci int slot_num, 4808c2ecf20Sopenharmony_ci struct ocfs2_dinode **alloc_copy) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci int status = 0; 4838c2ecf20Sopenharmony_ci struct buffer_head *alloc_bh = NULL; 4848c2ecf20Sopenharmony_ci struct inode *inode = NULL; 4858c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc; 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci trace_ocfs2_begin_local_alloc_recovery(slot_num); 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci *alloc_copy = NULL; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci inode = ocfs2_get_system_file_inode(osb, 4928c2ecf20Sopenharmony_ci LOCAL_ALLOC_SYSTEM_INODE, 4938c2ecf20Sopenharmony_ci slot_num); 4948c2ecf20Sopenharmony_ci if (!inode) { 4958c2ecf20Sopenharmony_ci status = -EINVAL; 4968c2ecf20Sopenharmony_ci mlog_errno(status); 4978c2ecf20Sopenharmony_ci goto bail; 4988c2ecf20Sopenharmony_ci } 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci inode_lock(inode); 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci status = ocfs2_read_inode_block_full(inode, &alloc_bh, 5038c2ecf20Sopenharmony_ci OCFS2_BH_IGNORE_CACHE); 5048c2ecf20Sopenharmony_ci if (status < 0) { 5058c2ecf20Sopenharmony_ci mlog_errno(status); 5068c2ecf20Sopenharmony_ci goto bail; 5078c2ecf20Sopenharmony_ci } 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 5108c2ecf20Sopenharmony_ci if (!(*alloc_copy)) { 5118c2ecf20Sopenharmony_ci status = -ENOMEM; 5128c2ecf20Sopenharmony_ci goto bail; 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 5178c2ecf20Sopenharmony_ci ocfs2_clear_local_alloc(alloc); 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 5208c2ecf20Sopenharmony_ci status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 5218c2ecf20Sopenharmony_ci if (status < 0) 5228c2ecf20Sopenharmony_ci mlog_errno(status); 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_cibail: 5258c2ecf20Sopenharmony_ci if (status < 0) { 5268c2ecf20Sopenharmony_ci kfree(*alloc_copy); 5278c2ecf20Sopenharmony_ci *alloc_copy = NULL; 5288c2ecf20Sopenharmony_ci } 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci brelse(alloc_bh); 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci if (inode) { 5338c2ecf20Sopenharmony_ci inode_unlock(inode); 5348c2ecf20Sopenharmony_ci iput(inode); 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci if (status) 5388c2ecf20Sopenharmony_ci mlog_errno(status); 5398c2ecf20Sopenharmony_ci return status; 5408c2ecf20Sopenharmony_ci} 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci/* 5438c2ecf20Sopenharmony_ci * Step 2: By now, we've completed the journal recovery, we've stamped 5448c2ecf20Sopenharmony_ci * a clean local alloc on disk and dropped the node out of the 5458c2ecf20Sopenharmony_ci * recovery map. Dlm locks will no longer stall, so lets clear out the 5468c2ecf20Sopenharmony_ci * main bitmap. 5478c2ecf20Sopenharmony_ci */ 5488c2ecf20Sopenharmony_ciint ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 5498c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc) 5508c2ecf20Sopenharmony_ci{ 5518c2ecf20Sopenharmony_ci int status; 5528c2ecf20Sopenharmony_ci handle_t *handle; 5538c2ecf20Sopenharmony_ci struct buffer_head *main_bm_bh = NULL; 5548c2ecf20Sopenharmony_ci struct inode *main_bm_inode; 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci main_bm_inode = ocfs2_get_system_file_inode(osb, 5578c2ecf20Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 5588c2ecf20Sopenharmony_ci OCFS2_INVALID_SLOT); 5598c2ecf20Sopenharmony_ci if (!main_bm_inode) { 5608c2ecf20Sopenharmony_ci status = -EINVAL; 5618c2ecf20Sopenharmony_ci mlog_errno(status); 5628c2ecf20Sopenharmony_ci goto out; 5638c2ecf20Sopenharmony_ci } 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci inode_lock(main_bm_inode); 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 5688c2ecf20Sopenharmony_ci if (status < 0) { 5698c2ecf20Sopenharmony_ci mlog_errno(status); 5708c2ecf20Sopenharmony_ci goto out_mutex; 5718c2ecf20Sopenharmony_ci } 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 5748c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 5758c2ecf20Sopenharmony_ci status = PTR_ERR(handle); 5768c2ecf20Sopenharmony_ci handle = NULL; 5778c2ecf20Sopenharmony_ci mlog_errno(status); 5788c2ecf20Sopenharmony_ci goto out_unlock; 5798c2ecf20Sopenharmony_ci } 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci /* we want the bitmap change to be recorded on disk asap */ 5828c2ecf20Sopenharmony_ci handle->h_sync = 1; 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci status = ocfs2_sync_local_to_main(osb, handle, alloc, 5858c2ecf20Sopenharmony_ci main_bm_inode, main_bm_bh); 5868c2ecf20Sopenharmony_ci if (status < 0) 5878c2ecf20Sopenharmony_ci mlog_errno(status); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ciout_unlock: 5928c2ecf20Sopenharmony_ci ocfs2_inode_unlock(main_bm_inode, 1); 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ciout_mutex: 5958c2ecf20Sopenharmony_ci inode_unlock(main_bm_inode); 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci brelse(main_bm_bh); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci iput(main_bm_inode); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ciout: 6028c2ecf20Sopenharmony_ci if (!status) 6038c2ecf20Sopenharmony_ci ocfs2_init_steal_slots(osb); 6048c2ecf20Sopenharmony_ci if (status) 6058c2ecf20Sopenharmony_ci mlog_errno(status); 6068c2ecf20Sopenharmony_ci return status; 6078c2ecf20Sopenharmony_ci} 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci/* 6108c2ecf20Sopenharmony_ci * make sure we've got at least bits_wanted contiguous bits in the 6118c2ecf20Sopenharmony_ci * local alloc. You lose them when you drop i_mutex. 6128c2ecf20Sopenharmony_ci * 6138c2ecf20Sopenharmony_ci * We will add ourselves to the transaction passed in, but may start 6148c2ecf20Sopenharmony_ci * our own in order to shift windows. 6158c2ecf20Sopenharmony_ci */ 6168c2ecf20Sopenharmony_ciint ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 6178c2ecf20Sopenharmony_ci u32 bits_wanted, 6188c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac) 6198c2ecf20Sopenharmony_ci{ 6208c2ecf20Sopenharmony_ci int status; 6218c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc; 6228c2ecf20Sopenharmony_ci struct inode *local_alloc_inode; 6238c2ecf20Sopenharmony_ci unsigned int free_bits; 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci BUG_ON(!ac); 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci local_alloc_inode = 6288c2ecf20Sopenharmony_ci ocfs2_get_system_file_inode(osb, 6298c2ecf20Sopenharmony_ci LOCAL_ALLOC_SYSTEM_INODE, 6308c2ecf20Sopenharmony_ci osb->slot_num); 6318c2ecf20Sopenharmony_ci if (!local_alloc_inode) { 6328c2ecf20Sopenharmony_ci status = -ENOENT; 6338c2ecf20Sopenharmony_ci mlog_errno(status); 6348c2ecf20Sopenharmony_ci goto bail; 6358c2ecf20Sopenharmony_ci } 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci inode_lock(local_alloc_inode); 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci /* 6408c2ecf20Sopenharmony_ci * We must double check state and allocator bits because 6418c2ecf20Sopenharmony_ci * another process may have changed them while holding i_mutex. 6428c2ecf20Sopenharmony_ci */ 6438c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 6448c2ecf20Sopenharmony_ci if (!ocfs2_la_state_enabled(osb) || 6458c2ecf20Sopenharmony_ci (bits_wanted > osb->local_alloc_bits)) { 6468c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 6478c2ecf20Sopenharmony_ci status = -ENOSPC; 6488c2ecf20Sopenharmony_ci goto bail; 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_DEBUG_FS 6558c2ecf20Sopenharmony_ci if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 6568c2ecf20Sopenharmony_ci ocfs2_local_alloc_count_bits(alloc)) { 6578c2ecf20Sopenharmony_ci status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 6588c2ecf20Sopenharmony_ci (unsigned long long)le64_to_cpu(alloc->i_blkno), 6598c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_used), 6608c2ecf20Sopenharmony_ci ocfs2_local_alloc_count_bits(alloc)); 6618c2ecf20Sopenharmony_ci goto bail; 6628c2ecf20Sopenharmony_ci } 6638c2ecf20Sopenharmony_ci#endif 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 6668c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_used); 6678c2ecf20Sopenharmony_ci if (bits_wanted > free_bits) { 6688c2ecf20Sopenharmony_ci /* uhoh, window change time. */ 6698c2ecf20Sopenharmony_ci status = 6708c2ecf20Sopenharmony_ci ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 6718c2ecf20Sopenharmony_ci if (status < 0) { 6728c2ecf20Sopenharmony_ci if (status != -ENOSPC) 6738c2ecf20Sopenharmony_ci mlog_errno(status); 6748c2ecf20Sopenharmony_ci goto bail; 6758c2ecf20Sopenharmony_ci } 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci /* 6788c2ecf20Sopenharmony_ci * Under certain conditions, the window slide code 6798c2ecf20Sopenharmony_ci * might have reduced the number of bits available or 6808c2ecf20Sopenharmony_ci * disabled the local alloc entirely. Re-check 6818c2ecf20Sopenharmony_ci * here and return -ENOSPC if necessary. 6828c2ecf20Sopenharmony_ci */ 6838c2ecf20Sopenharmony_ci status = -ENOSPC; 6848c2ecf20Sopenharmony_ci if (!ocfs2_la_state_enabled(osb)) 6858c2ecf20Sopenharmony_ci goto bail; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 6888c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_used); 6898c2ecf20Sopenharmony_ci if (bits_wanted > free_bits) 6908c2ecf20Sopenharmony_ci goto bail; 6918c2ecf20Sopenharmony_ci } 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci ac->ac_inode = local_alloc_inode; 6948c2ecf20Sopenharmony_ci /* We should never use localalloc from another slot */ 6958c2ecf20Sopenharmony_ci ac->ac_alloc_slot = osb->slot_num; 6968c2ecf20Sopenharmony_ci ac->ac_which = OCFS2_AC_USE_LOCAL; 6978c2ecf20Sopenharmony_ci get_bh(osb->local_alloc_bh); 6988c2ecf20Sopenharmony_ci ac->ac_bh = osb->local_alloc_bh; 6998c2ecf20Sopenharmony_ci status = 0; 7008c2ecf20Sopenharmony_cibail: 7018c2ecf20Sopenharmony_ci if (status < 0 && local_alloc_inode) { 7028c2ecf20Sopenharmony_ci inode_unlock(local_alloc_inode); 7038c2ecf20Sopenharmony_ci iput(local_alloc_inode); 7048c2ecf20Sopenharmony_ci } 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci trace_ocfs2_reserve_local_alloc_bits( 7078c2ecf20Sopenharmony_ci (unsigned long long)ac->ac_max_block, 7088c2ecf20Sopenharmony_ci bits_wanted, osb->slot_num, status); 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci if (status) 7118c2ecf20Sopenharmony_ci mlog_errno(status); 7128c2ecf20Sopenharmony_ci return status; 7138c2ecf20Sopenharmony_ci} 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ciint ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 7168c2ecf20Sopenharmony_ci handle_t *handle, 7178c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac, 7188c2ecf20Sopenharmony_ci u32 bits_wanted, 7198c2ecf20Sopenharmony_ci u32 *bit_off, 7208c2ecf20Sopenharmony_ci u32 *num_bits) 7218c2ecf20Sopenharmony_ci{ 7228c2ecf20Sopenharmony_ci int status, start; 7238c2ecf20Sopenharmony_ci struct inode *local_alloc_inode; 7248c2ecf20Sopenharmony_ci void *bitmap; 7258c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc; 7268c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci local_alloc_inode = ac->ac_inode; 7318c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 7328c2ecf20Sopenharmony_ci la = OCFS2_LOCAL_ALLOC(alloc); 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 7358c2ecf20Sopenharmony_ci ac->ac_resv); 7368c2ecf20Sopenharmony_ci if (start == -1) { 7378c2ecf20Sopenharmony_ci /* TODO: Shouldn't we just BUG here? */ 7388c2ecf20Sopenharmony_ci status = -ENOSPC; 7398c2ecf20Sopenharmony_ci mlog_errno(status); 7408c2ecf20Sopenharmony_ci goto bail; 7418c2ecf20Sopenharmony_ci } 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci bitmap = la->la_bitmap; 7448c2ecf20Sopenharmony_ci *bit_off = le32_to_cpu(la->la_bm_off) + start; 7458c2ecf20Sopenharmony_ci *num_bits = bits_wanted; 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci status = ocfs2_journal_access_di(handle, 7488c2ecf20Sopenharmony_ci INODE_CACHE(local_alloc_inode), 7498c2ecf20Sopenharmony_ci osb->local_alloc_bh, 7508c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 7518c2ecf20Sopenharmony_ci if (status < 0) { 7528c2ecf20Sopenharmony_ci mlog_errno(status); 7538c2ecf20Sopenharmony_ci goto bail; 7548c2ecf20Sopenharmony_ci } 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_ci ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 7578c2ecf20Sopenharmony_ci bits_wanted); 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci while(bits_wanted--) 7608c2ecf20Sopenharmony_ci ocfs2_set_bit(start++, bitmap); 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 7638c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, osb->local_alloc_bh); 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_cibail: 7668c2ecf20Sopenharmony_ci if (status) 7678c2ecf20Sopenharmony_ci mlog_errno(status); 7688c2ecf20Sopenharmony_ci return status; 7698c2ecf20Sopenharmony_ci} 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ciint ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 7728c2ecf20Sopenharmony_ci handle_t *handle, 7738c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac, 7748c2ecf20Sopenharmony_ci u32 bit_off, 7758c2ecf20Sopenharmony_ci u32 num_bits) 7768c2ecf20Sopenharmony_ci{ 7778c2ecf20Sopenharmony_ci int status, start; 7788c2ecf20Sopenharmony_ci u32 clear_bits; 7798c2ecf20Sopenharmony_ci struct inode *local_alloc_inode; 7808c2ecf20Sopenharmony_ci void *bitmap; 7818c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc; 7828c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la; 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci local_alloc_inode = ac->ac_inode; 7878c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 7888c2ecf20Sopenharmony_ci la = OCFS2_LOCAL_ALLOC(alloc); 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci bitmap = la->la_bitmap; 7918c2ecf20Sopenharmony_ci start = bit_off - le32_to_cpu(la->la_bm_off); 7928c2ecf20Sopenharmony_ci clear_bits = num_bits; 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci status = ocfs2_journal_access_di(handle, 7958c2ecf20Sopenharmony_ci INODE_CACHE(local_alloc_inode), 7968c2ecf20Sopenharmony_ci osb->local_alloc_bh, 7978c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 7988c2ecf20Sopenharmony_ci if (status < 0) { 7998c2ecf20Sopenharmony_ci mlog_errno(status); 8008c2ecf20Sopenharmony_ci goto bail; 8018c2ecf20Sopenharmony_ci } 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci while (clear_bits--) 8048c2ecf20Sopenharmony_ci ocfs2_clear_bit(start++, bitmap); 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ci le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 8078c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, osb->local_alloc_bh); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_cibail: 8108c2ecf20Sopenharmony_ci return status; 8118c2ecf20Sopenharmony_ci} 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_cistatic u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 8148c2ecf20Sopenharmony_ci{ 8158c2ecf20Sopenharmony_ci u32 count; 8168c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci trace_ocfs2_local_alloc_count_bits(count); 8218c2ecf20Sopenharmony_ci return count; 8228c2ecf20Sopenharmony_ci} 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 8258c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc, 8268c2ecf20Sopenharmony_ci u32 *numbits, 8278c2ecf20Sopenharmony_ci struct ocfs2_alloc_reservation *resv) 8288c2ecf20Sopenharmony_ci{ 8298c2ecf20Sopenharmony_ci int numfound = 0, bitoff, left, startoff; 8308c2ecf20Sopenharmony_ci int local_resv = 0; 8318c2ecf20Sopenharmony_ci struct ocfs2_alloc_reservation r; 8328c2ecf20Sopenharmony_ci void *bitmap = NULL; 8338c2ecf20Sopenharmony_ci struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci if (!alloc->id1.bitmap1.i_total) { 8368c2ecf20Sopenharmony_ci bitoff = -1; 8378c2ecf20Sopenharmony_ci goto bail; 8388c2ecf20Sopenharmony_ci } 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci if (!resv) { 8418c2ecf20Sopenharmony_ci local_resv = 1; 8428c2ecf20Sopenharmony_ci ocfs2_resv_init_once(&r); 8438c2ecf20Sopenharmony_ci ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 8448c2ecf20Sopenharmony_ci resv = &r; 8458c2ecf20Sopenharmony_ci } 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci numfound = *numbits; 8488c2ecf20Sopenharmony_ci if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 8498c2ecf20Sopenharmony_ci if (numfound < *numbits) 8508c2ecf20Sopenharmony_ci *numbits = numfound; 8518c2ecf20Sopenharmony_ci goto bail; 8528c2ecf20Sopenharmony_ci } 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci /* 8558c2ecf20Sopenharmony_ci * Code error. While reservations are enabled, local 8568c2ecf20Sopenharmony_ci * allocation should _always_ go through them. 8578c2ecf20Sopenharmony_ci */ 8588c2ecf20Sopenharmony_ci BUG_ON(osb->osb_resv_level != 0); 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci /* 8618c2ecf20Sopenharmony_ci * Reservations are disabled. Handle this the old way. 8628c2ecf20Sopenharmony_ci */ 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci numfound = bitoff = startoff = 0; 8678c2ecf20Sopenharmony_ci left = le32_to_cpu(alloc->id1.bitmap1.i_total); 8688c2ecf20Sopenharmony_ci while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 8698c2ecf20Sopenharmony_ci if (bitoff == left) { 8708c2ecf20Sopenharmony_ci /* mlog(0, "bitoff (%d) == left", bitoff); */ 8718c2ecf20Sopenharmony_ci break; 8728c2ecf20Sopenharmony_ci } 8738c2ecf20Sopenharmony_ci /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 8748c2ecf20Sopenharmony_ci "numfound = %d\n", bitoff, startoff, numfound);*/ 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci /* Ok, we found a zero bit... is it contig. or do we 8778c2ecf20Sopenharmony_ci * start over?*/ 8788c2ecf20Sopenharmony_ci if (bitoff == startoff) { 8798c2ecf20Sopenharmony_ci /* we found a zero */ 8808c2ecf20Sopenharmony_ci numfound++; 8818c2ecf20Sopenharmony_ci startoff++; 8828c2ecf20Sopenharmony_ci } else { 8838c2ecf20Sopenharmony_ci /* got a zero after some ones */ 8848c2ecf20Sopenharmony_ci numfound = 1; 8858c2ecf20Sopenharmony_ci startoff = bitoff+1; 8868c2ecf20Sopenharmony_ci } 8878c2ecf20Sopenharmony_ci /* we got everything we needed */ 8888c2ecf20Sopenharmony_ci if (numfound == *numbits) { 8898c2ecf20Sopenharmony_ci /* mlog(0, "Found it all!\n"); */ 8908c2ecf20Sopenharmony_ci break; 8918c2ecf20Sopenharmony_ci } 8928c2ecf20Sopenharmony_ci } 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci if (numfound == *numbits) 8978c2ecf20Sopenharmony_ci bitoff = startoff - numfound; 8988c2ecf20Sopenharmony_ci else 8998c2ecf20Sopenharmony_ci bitoff = -1; 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_cibail: 9028c2ecf20Sopenharmony_ci if (local_resv) 9038c2ecf20Sopenharmony_ci ocfs2_resv_discard(resmap, resv); 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci trace_ocfs2_local_alloc_find_clear_bits(*numbits, 9068c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total), 9078c2ecf20Sopenharmony_ci bitoff, numfound); 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci return bitoff; 9108c2ecf20Sopenharmony_ci} 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_cistatic void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 9138c2ecf20Sopenharmony_ci{ 9148c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 9158c2ecf20Sopenharmony_ci int i; 9168c2ecf20Sopenharmony_ci 9178c2ecf20Sopenharmony_ci alloc->id1.bitmap1.i_total = 0; 9188c2ecf20Sopenharmony_ci alloc->id1.bitmap1.i_used = 0; 9198c2ecf20Sopenharmony_ci la->la_bm_off = 0; 9208c2ecf20Sopenharmony_ci for(i = 0; i < le16_to_cpu(la->la_size); i++) 9218c2ecf20Sopenharmony_ci la->la_bitmap[i] = 0; 9228c2ecf20Sopenharmony_ci} 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci#if 0 9258c2ecf20Sopenharmony_ci/* turn this on and uncomment below to aid debugging window shifts. */ 9268c2ecf20Sopenharmony_cistatic void ocfs2_verify_zero_bits(unsigned long *bitmap, 9278c2ecf20Sopenharmony_ci unsigned int start, 9288c2ecf20Sopenharmony_ci unsigned int count) 9298c2ecf20Sopenharmony_ci{ 9308c2ecf20Sopenharmony_ci unsigned int tmp = count; 9318c2ecf20Sopenharmony_ci while(tmp--) { 9328c2ecf20Sopenharmony_ci if (ocfs2_test_bit(start + tmp, bitmap)) { 9338c2ecf20Sopenharmony_ci printk("ocfs2_verify_zero_bits: start = %u, count = " 9348c2ecf20Sopenharmony_ci "%u\n", start, count); 9358c2ecf20Sopenharmony_ci printk("ocfs2_verify_zero_bits: bit %u is set!", 9368c2ecf20Sopenharmony_ci start + tmp); 9378c2ecf20Sopenharmony_ci BUG(); 9388c2ecf20Sopenharmony_ci } 9398c2ecf20Sopenharmony_ci } 9408c2ecf20Sopenharmony_ci} 9418c2ecf20Sopenharmony_ci#endif 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci/* 9448c2ecf20Sopenharmony_ci * sync the local alloc to main bitmap. 9458c2ecf20Sopenharmony_ci * 9468c2ecf20Sopenharmony_ci * assumes you've already locked the main bitmap -- the bitmap inode 9478c2ecf20Sopenharmony_ci * passed is used for caching. 9488c2ecf20Sopenharmony_ci */ 9498c2ecf20Sopenharmony_cistatic int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 9508c2ecf20Sopenharmony_ci handle_t *handle, 9518c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc, 9528c2ecf20Sopenharmony_ci struct inode *main_bm_inode, 9538c2ecf20Sopenharmony_ci struct buffer_head *main_bm_bh) 9548c2ecf20Sopenharmony_ci{ 9558c2ecf20Sopenharmony_ci int status = 0; 9568c2ecf20Sopenharmony_ci int bit_off, left, count, start; 9578c2ecf20Sopenharmony_ci u64 la_start_blk; 9588c2ecf20Sopenharmony_ci u64 blkno; 9598c2ecf20Sopenharmony_ci void *bitmap; 9608c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci trace_ocfs2_sync_local_to_main( 9638c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total), 9648c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_used)); 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci if (!alloc->id1.bitmap1.i_total) { 9678c2ecf20Sopenharmony_ci goto bail; 9688c2ecf20Sopenharmony_ci } 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 9718c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total)) { 9728c2ecf20Sopenharmony_ci goto bail; 9738c2ecf20Sopenharmony_ci } 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 9768c2ecf20Sopenharmony_ci le32_to_cpu(la->la_bm_off)); 9778c2ecf20Sopenharmony_ci bitmap = la->la_bitmap; 9788c2ecf20Sopenharmony_ci start = count = bit_off = 0; 9798c2ecf20Sopenharmony_ci left = le32_to_cpu(alloc->id1.bitmap1.i_total); 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 9828c2ecf20Sopenharmony_ci != -1) { 9838c2ecf20Sopenharmony_ci if ((bit_off < left) && (bit_off == start)) { 9848c2ecf20Sopenharmony_ci count++; 9858c2ecf20Sopenharmony_ci start++; 9868c2ecf20Sopenharmony_ci continue; 9878c2ecf20Sopenharmony_ci } 9888c2ecf20Sopenharmony_ci if (count) { 9898c2ecf20Sopenharmony_ci blkno = la_start_blk + 9908c2ecf20Sopenharmony_ci ocfs2_clusters_to_blocks(osb->sb, 9918c2ecf20Sopenharmony_ci start - count); 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci trace_ocfs2_sync_local_to_main_free( 9948c2ecf20Sopenharmony_ci count, start - count, 9958c2ecf20Sopenharmony_ci (unsigned long long)la_start_blk, 9968c2ecf20Sopenharmony_ci (unsigned long long)blkno); 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci status = ocfs2_release_clusters(handle, 9998c2ecf20Sopenharmony_ci main_bm_inode, 10008c2ecf20Sopenharmony_ci main_bm_bh, blkno, 10018c2ecf20Sopenharmony_ci count); 10028c2ecf20Sopenharmony_ci if (status < 0) { 10038c2ecf20Sopenharmony_ci mlog_errno(status); 10048c2ecf20Sopenharmony_ci goto bail; 10058c2ecf20Sopenharmony_ci } 10068c2ecf20Sopenharmony_ci } 10078c2ecf20Sopenharmony_ci if (bit_off >= left) 10088c2ecf20Sopenharmony_ci break; 10098c2ecf20Sopenharmony_ci count = 1; 10108c2ecf20Sopenharmony_ci start = bit_off + 1; 10118c2ecf20Sopenharmony_ci } 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_cibail: 10148c2ecf20Sopenharmony_ci if (status) 10158c2ecf20Sopenharmony_ci mlog_errno(status); 10168c2ecf20Sopenharmony_ci return status; 10178c2ecf20Sopenharmony_ci} 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_cienum ocfs2_la_event { 10208c2ecf20Sopenharmony_ci OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 10218c2ecf20Sopenharmony_ci OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 10228c2ecf20Sopenharmony_ci * enough bits theoretically 10238c2ecf20Sopenharmony_ci * free, but a contiguous 10248c2ecf20Sopenharmony_ci * allocation could not be 10258c2ecf20Sopenharmony_ci * found. */ 10268c2ecf20Sopenharmony_ci OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 10278c2ecf20Sopenharmony_ci * enough bits free to satisfy 10288c2ecf20Sopenharmony_ci * our request. */ 10298c2ecf20Sopenharmony_ci}; 10308c2ecf20Sopenharmony_ci#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 10318c2ecf20Sopenharmony_ci/* 10328c2ecf20Sopenharmony_ci * Given an event, calculate the size of our next local alloc window. 10338c2ecf20Sopenharmony_ci * 10348c2ecf20Sopenharmony_ci * This should always be called under i_mutex of the local alloc inode 10358c2ecf20Sopenharmony_ci * so that local alloc disabling doesn't race with processes trying to 10368c2ecf20Sopenharmony_ci * use the allocator. 10378c2ecf20Sopenharmony_ci * 10388c2ecf20Sopenharmony_ci * Returns the state which the local alloc was left in. This value can 10398c2ecf20Sopenharmony_ci * be ignored by some paths. 10408c2ecf20Sopenharmony_ci */ 10418c2ecf20Sopenharmony_cistatic int ocfs2_recalc_la_window(struct ocfs2_super *osb, 10428c2ecf20Sopenharmony_ci enum ocfs2_la_event event) 10438c2ecf20Sopenharmony_ci{ 10448c2ecf20Sopenharmony_ci unsigned int bits; 10458c2ecf20Sopenharmony_ci int state; 10468c2ecf20Sopenharmony_ci 10478c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 10488c2ecf20Sopenharmony_ci if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 10498c2ecf20Sopenharmony_ci WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 10508c2ecf20Sopenharmony_ci goto out_unlock; 10518c2ecf20Sopenharmony_ci } 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci /* 10548c2ecf20Sopenharmony_ci * ENOSPC and fragmentation are treated similarly for now. 10558c2ecf20Sopenharmony_ci */ 10568c2ecf20Sopenharmony_ci if (event == OCFS2_LA_EVENT_ENOSPC || 10578c2ecf20Sopenharmony_ci event == OCFS2_LA_EVENT_FRAGMENTED) { 10588c2ecf20Sopenharmony_ci /* 10598c2ecf20Sopenharmony_ci * We ran out of contiguous space in the primary 10608c2ecf20Sopenharmony_ci * bitmap. Drastically reduce the number of bits used 10618c2ecf20Sopenharmony_ci * by local alloc until we have to disable it. 10628c2ecf20Sopenharmony_ci */ 10638c2ecf20Sopenharmony_ci bits = osb->local_alloc_bits >> 1; 10648c2ecf20Sopenharmony_ci if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 10658c2ecf20Sopenharmony_ci /* 10668c2ecf20Sopenharmony_ci * By setting state to THROTTLED, we'll keep 10678c2ecf20Sopenharmony_ci * the number of local alloc bits used down 10688c2ecf20Sopenharmony_ci * until an event occurs which would give us 10698c2ecf20Sopenharmony_ci * reason to assume the bitmap situation might 10708c2ecf20Sopenharmony_ci * have changed. 10718c2ecf20Sopenharmony_ci */ 10728c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_THROTTLED; 10738c2ecf20Sopenharmony_ci osb->local_alloc_bits = bits; 10748c2ecf20Sopenharmony_ci } else { 10758c2ecf20Sopenharmony_ci osb->local_alloc_state = OCFS2_LA_DISABLED; 10768c2ecf20Sopenharmony_ci } 10778c2ecf20Sopenharmony_ci queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 10788c2ecf20Sopenharmony_ci OCFS2_LA_ENABLE_INTERVAL); 10798c2ecf20Sopenharmony_ci goto out_unlock; 10808c2ecf20Sopenharmony_ci } 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci /* 10838c2ecf20Sopenharmony_ci * Don't increase the size of the local alloc window until we 10848c2ecf20Sopenharmony_ci * know we might be able to fulfill the request. Otherwise, we 10858c2ecf20Sopenharmony_ci * risk bouncing around the global bitmap during periods of 10868c2ecf20Sopenharmony_ci * low space. 10878c2ecf20Sopenharmony_ci */ 10888c2ecf20Sopenharmony_ci if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 10898c2ecf20Sopenharmony_ci osb->local_alloc_bits = osb->local_alloc_default_bits; 10908c2ecf20Sopenharmony_ci 10918c2ecf20Sopenharmony_ciout_unlock: 10928c2ecf20Sopenharmony_ci state = osb->local_alloc_state; 10938c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci return state; 10968c2ecf20Sopenharmony_ci} 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 10998c2ecf20Sopenharmony_ci struct ocfs2_alloc_context **ac, 11008c2ecf20Sopenharmony_ci struct inode **bitmap_inode, 11018c2ecf20Sopenharmony_ci struct buffer_head **bitmap_bh) 11028c2ecf20Sopenharmony_ci{ 11038c2ecf20Sopenharmony_ci int status; 11048c2ecf20Sopenharmony_ci 11058c2ecf20Sopenharmony_ci *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 11068c2ecf20Sopenharmony_ci if (!(*ac)) { 11078c2ecf20Sopenharmony_ci status = -ENOMEM; 11088c2ecf20Sopenharmony_ci mlog_errno(status); 11098c2ecf20Sopenharmony_ci goto bail; 11108c2ecf20Sopenharmony_ci } 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ciretry_enospc: 11138c2ecf20Sopenharmony_ci (*ac)->ac_bits_wanted = osb->local_alloc_bits; 11148c2ecf20Sopenharmony_ci status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 11158c2ecf20Sopenharmony_ci if (status == -ENOSPC) { 11168c2ecf20Sopenharmony_ci if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 11178c2ecf20Sopenharmony_ci OCFS2_LA_DISABLED) 11188c2ecf20Sopenharmony_ci goto bail; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci ocfs2_free_ac_resource(*ac); 11218c2ecf20Sopenharmony_ci memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 11228c2ecf20Sopenharmony_ci goto retry_enospc; 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci if (status < 0) { 11258c2ecf20Sopenharmony_ci mlog_errno(status); 11268c2ecf20Sopenharmony_ci goto bail; 11278c2ecf20Sopenharmony_ci } 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci *bitmap_inode = (*ac)->ac_inode; 11308c2ecf20Sopenharmony_ci igrab(*bitmap_inode); 11318c2ecf20Sopenharmony_ci *bitmap_bh = (*ac)->ac_bh; 11328c2ecf20Sopenharmony_ci get_bh(*bitmap_bh); 11338c2ecf20Sopenharmony_ci status = 0; 11348c2ecf20Sopenharmony_cibail: 11358c2ecf20Sopenharmony_ci if ((status < 0) && *ac) { 11368c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(*ac); 11378c2ecf20Sopenharmony_ci *ac = NULL; 11388c2ecf20Sopenharmony_ci } 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci if (status) 11418c2ecf20Sopenharmony_ci mlog_errno(status); 11428c2ecf20Sopenharmony_ci return status; 11438c2ecf20Sopenharmony_ci} 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci/* 11468c2ecf20Sopenharmony_ci * pass it the bitmap lock in lock_bh if you have it. 11478c2ecf20Sopenharmony_ci */ 11488c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 11498c2ecf20Sopenharmony_ci handle_t *handle, 11508c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac) 11518c2ecf20Sopenharmony_ci{ 11528c2ecf20Sopenharmony_ci int status = 0; 11538c2ecf20Sopenharmony_ci u32 cluster_off, cluster_count; 11548c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc = NULL; 11558c2ecf20Sopenharmony_ci struct ocfs2_local_alloc *la; 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 11588c2ecf20Sopenharmony_ci la = OCFS2_LOCAL_ALLOC(alloc); 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci trace_ocfs2_local_alloc_new_window( 11618c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total), 11628c2ecf20Sopenharmony_ci osb->local_alloc_bits); 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci /* Instruct the allocation code to try the most recently used 11658c2ecf20Sopenharmony_ci * cluster group. We'll re-record the group used this pass 11668c2ecf20Sopenharmony_ci * below. */ 11678c2ecf20Sopenharmony_ci ac->ac_last_group = osb->la_last_gd; 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci /* we used the generic suballoc reserve function, but we set 11708c2ecf20Sopenharmony_ci * everything up nicely, so there's no reason why we can't use 11718c2ecf20Sopenharmony_ci * the more specific cluster api to claim bits. */ 11728c2ecf20Sopenharmony_ci status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 11738c2ecf20Sopenharmony_ci &cluster_off, &cluster_count); 11748c2ecf20Sopenharmony_ci if (status == -ENOSPC) { 11758c2ecf20Sopenharmony_ciretry_enospc: 11768c2ecf20Sopenharmony_ci /* 11778c2ecf20Sopenharmony_ci * Note: We could also try syncing the journal here to 11788c2ecf20Sopenharmony_ci * allow use of any free bits which the current 11798c2ecf20Sopenharmony_ci * transaction can't give us access to. --Mark 11808c2ecf20Sopenharmony_ci */ 11818c2ecf20Sopenharmony_ci if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 11828c2ecf20Sopenharmony_ci OCFS2_LA_DISABLED) 11838c2ecf20Sopenharmony_ci goto bail; 11848c2ecf20Sopenharmony_ci 11858c2ecf20Sopenharmony_ci ac->ac_bits_wanted = osb->local_alloc_bits; 11868c2ecf20Sopenharmony_ci status = ocfs2_claim_clusters(handle, ac, 11878c2ecf20Sopenharmony_ci osb->local_alloc_bits, 11888c2ecf20Sopenharmony_ci &cluster_off, 11898c2ecf20Sopenharmony_ci &cluster_count); 11908c2ecf20Sopenharmony_ci if (status == -ENOSPC) 11918c2ecf20Sopenharmony_ci goto retry_enospc; 11928c2ecf20Sopenharmony_ci /* 11938c2ecf20Sopenharmony_ci * We only shrunk the *minimum* number of in our 11948c2ecf20Sopenharmony_ci * request - it's entirely possible that the allocator 11958c2ecf20Sopenharmony_ci * might give us more than we asked for. 11968c2ecf20Sopenharmony_ci */ 11978c2ecf20Sopenharmony_ci if (status == 0) { 11988c2ecf20Sopenharmony_ci spin_lock(&osb->osb_lock); 11998c2ecf20Sopenharmony_ci osb->local_alloc_bits = cluster_count; 12008c2ecf20Sopenharmony_ci spin_unlock(&osb->osb_lock); 12018c2ecf20Sopenharmony_ci } 12028c2ecf20Sopenharmony_ci } 12038c2ecf20Sopenharmony_ci if (status < 0) { 12048c2ecf20Sopenharmony_ci if (status != -ENOSPC) 12058c2ecf20Sopenharmony_ci mlog_errno(status); 12068c2ecf20Sopenharmony_ci goto bail; 12078c2ecf20Sopenharmony_ci } 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ci osb->la_last_gd = ac->ac_last_group; 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci la->la_bm_off = cpu_to_le32(cluster_off); 12128c2ecf20Sopenharmony_ci alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 12138c2ecf20Sopenharmony_ci /* just in case... In the future when we find space ourselves, 12148c2ecf20Sopenharmony_ci * we don't have to get all contiguous -- but we'll have to 12158c2ecf20Sopenharmony_ci * set all previously used bits in bitmap and update 12168c2ecf20Sopenharmony_ci * la_bits_set before setting the bits in the main bitmap. */ 12178c2ecf20Sopenharmony_ci alloc->id1.bitmap1.i_used = 0; 12188c2ecf20Sopenharmony_ci memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 12198c2ecf20Sopenharmony_ci le16_to_cpu(la->la_size)); 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_ci ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 12228c2ecf20Sopenharmony_ci OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci trace_ocfs2_local_alloc_new_window_result( 12258c2ecf20Sopenharmony_ci OCFS2_LOCAL_ALLOC(alloc)->la_bm_off, 12268c2ecf20Sopenharmony_ci le32_to_cpu(alloc->id1.bitmap1.i_total)); 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_cibail: 12298c2ecf20Sopenharmony_ci if (status) 12308c2ecf20Sopenharmony_ci mlog_errno(status); 12318c2ecf20Sopenharmony_ci return status; 12328c2ecf20Sopenharmony_ci} 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci/* Note that we do *NOT* lock the local alloc inode here as 12358c2ecf20Sopenharmony_ci * it's been locked already for us. */ 12368c2ecf20Sopenharmony_cistatic int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 12378c2ecf20Sopenharmony_ci struct inode *local_alloc_inode) 12388c2ecf20Sopenharmony_ci{ 12398c2ecf20Sopenharmony_ci int status = 0; 12408c2ecf20Sopenharmony_ci struct buffer_head *main_bm_bh = NULL; 12418c2ecf20Sopenharmony_ci struct inode *main_bm_inode = NULL; 12428c2ecf20Sopenharmony_ci handle_t *handle = NULL; 12438c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc; 12448c2ecf20Sopenharmony_ci struct ocfs2_dinode *alloc_copy = NULL; 12458c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *ac = NULL; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 12488c2ecf20Sopenharmony_ci 12498c2ecf20Sopenharmony_ci /* This will lock the main bitmap for us. */ 12508c2ecf20Sopenharmony_ci status = ocfs2_local_alloc_reserve_for_window(osb, 12518c2ecf20Sopenharmony_ci &ac, 12528c2ecf20Sopenharmony_ci &main_bm_inode, 12538c2ecf20Sopenharmony_ci &main_bm_bh); 12548c2ecf20Sopenharmony_ci if (status < 0) { 12558c2ecf20Sopenharmony_ci if (status != -ENOSPC) 12568c2ecf20Sopenharmony_ci mlog_errno(status); 12578c2ecf20Sopenharmony_ci goto bail; 12588c2ecf20Sopenharmony_ci } 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 12618c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 12628c2ecf20Sopenharmony_ci status = PTR_ERR(handle); 12638c2ecf20Sopenharmony_ci handle = NULL; 12648c2ecf20Sopenharmony_ci mlog_errno(status); 12658c2ecf20Sopenharmony_ci goto bail; 12668c2ecf20Sopenharmony_ci } 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci /* We want to clear the local alloc before doing anything 12718c2ecf20Sopenharmony_ci * else, so that if we error later during this operation, 12728c2ecf20Sopenharmony_ci * local alloc shutdown won't try to double free main bitmap 12738c2ecf20Sopenharmony_ci * bits. Make a copy so the sync function knows which bits to 12748c2ecf20Sopenharmony_ci * free. */ 12758c2ecf20Sopenharmony_ci alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 12768c2ecf20Sopenharmony_ci if (!alloc_copy) { 12778c2ecf20Sopenharmony_ci status = -ENOMEM; 12788c2ecf20Sopenharmony_ci mlog_errno(status); 12798c2ecf20Sopenharmony_ci goto bail; 12808c2ecf20Sopenharmony_ci } 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci status = ocfs2_journal_access_di(handle, 12838c2ecf20Sopenharmony_ci INODE_CACHE(local_alloc_inode), 12848c2ecf20Sopenharmony_ci osb->local_alloc_bh, 12858c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 12868c2ecf20Sopenharmony_ci if (status < 0) { 12878c2ecf20Sopenharmony_ci mlog_errno(status); 12888c2ecf20Sopenharmony_ci goto bail; 12898c2ecf20Sopenharmony_ci } 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci ocfs2_clear_local_alloc(alloc); 12928c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, osb->local_alloc_bh); 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 12958c2ecf20Sopenharmony_ci main_bm_inode, main_bm_bh); 12968c2ecf20Sopenharmony_ci if (status < 0) { 12978c2ecf20Sopenharmony_ci mlog_errno(status); 12988c2ecf20Sopenharmony_ci goto bail; 12998c2ecf20Sopenharmony_ci } 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci status = ocfs2_local_alloc_new_window(osb, handle, ac); 13028c2ecf20Sopenharmony_ci if (status < 0) { 13038c2ecf20Sopenharmony_ci if (status != -ENOSPC) 13048c2ecf20Sopenharmony_ci mlog_errno(status); 13058c2ecf20Sopenharmony_ci goto bail; 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci atomic_inc(&osb->alloc_stats.moves); 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_cibail: 13118c2ecf20Sopenharmony_ci if (handle) 13128c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci brelse(main_bm_bh); 13158c2ecf20Sopenharmony_ci 13168c2ecf20Sopenharmony_ci iput(main_bm_inode); 13178c2ecf20Sopenharmony_ci kfree(alloc_copy); 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci if (ac) 13208c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(ac); 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci if (status) 13238c2ecf20Sopenharmony_ci mlog_errno(status); 13248c2ecf20Sopenharmony_ci return status; 13258c2ecf20Sopenharmony_ci} 13268c2ecf20Sopenharmony_ci 1327