162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2009-2011 Red Hat, Inc. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: Mikulas Patocka <mpatocka@redhat.com> 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * This file is released under the GPL. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/dm-bufio.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/device-mapper.h> 1362306a36Sopenharmony_ci#include <linux/dm-io.h> 1462306a36Sopenharmony_ci#include <linux/slab.h> 1562306a36Sopenharmony_ci#include <linux/sched/mm.h> 1662306a36Sopenharmony_ci#include <linux/jiffies.h> 1762306a36Sopenharmony_ci#include <linux/vmalloc.h> 1862306a36Sopenharmony_ci#include <linux/shrinker.h> 1962306a36Sopenharmony_ci#include <linux/module.h> 2062306a36Sopenharmony_ci#include <linux/rbtree.h> 2162306a36Sopenharmony_ci#include <linux/stacktrace.h> 2262306a36Sopenharmony_ci#include <linux/jump_label.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include "dm.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#define DM_MSG_PREFIX "bufio" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci/* 2962306a36Sopenharmony_ci * Memory management policy: 3062306a36Sopenharmony_ci * Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory 3162306a36Sopenharmony_ci * or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower). 3262306a36Sopenharmony_ci * Always allocate at least DM_BUFIO_MIN_BUFFERS buffers. 3362306a36Sopenharmony_ci * Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT 3462306a36Sopenharmony_ci * dirty buffers. 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_ci#define DM_BUFIO_MIN_BUFFERS 8 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#define DM_BUFIO_MEMORY_PERCENT 2 3962306a36Sopenharmony_ci#define DM_BUFIO_VMALLOC_PERCENT 25 4062306a36Sopenharmony_ci#define DM_BUFIO_WRITEBACK_RATIO 3 4162306a36Sopenharmony_ci#define DM_BUFIO_LOW_WATERMARK_RATIO 16 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* 4462306a36Sopenharmony_ci * Check buffer ages in this interval (seconds) 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_ci#define DM_BUFIO_WORK_TIMER_SECS 30 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci/* 4962306a36Sopenharmony_ci * Free buffers when they are older than this (seconds) 5062306a36Sopenharmony_ci */ 5162306a36Sopenharmony_ci#define DM_BUFIO_DEFAULT_AGE_SECS 300 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * The nr of bytes of cached data to keep around. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci/* 5962306a36Sopenharmony_ci * Align buffer writes to this boundary. 6062306a36Sopenharmony_ci * Tests show that SSDs have the highest IOPS when using 4k writes. 6162306a36Sopenharmony_ci */ 6262306a36Sopenharmony_ci#define DM_BUFIO_WRITE_ALIGN 4096 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* 6562306a36Sopenharmony_ci * dm_buffer->list_mode 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci#define LIST_CLEAN 0 6862306a36Sopenharmony_ci#define LIST_DIRTY 1 6962306a36Sopenharmony_ci#define LIST_SIZE 2 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/*--------------------------------------------------------------*/ 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* 7462306a36Sopenharmony_ci * Rather than use an LRU list, we use a clock algorithm where entries 7562306a36Sopenharmony_ci * are held in a circular list. When an entry is 'hit' a reference bit 7662306a36Sopenharmony_ci * is set. The least recently used entry is approximated by running a 7762306a36Sopenharmony_ci * cursor around the list selecting unreferenced entries. Referenced 7862306a36Sopenharmony_ci * entries have their reference bit cleared as the cursor passes them. 7962306a36Sopenharmony_ci */ 8062306a36Sopenharmony_cistruct lru_entry { 8162306a36Sopenharmony_ci struct list_head list; 8262306a36Sopenharmony_ci atomic_t referenced; 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistruct lru_iter { 8662306a36Sopenharmony_ci struct lru *lru; 8762306a36Sopenharmony_ci struct list_head list; 8862306a36Sopenharmony_ci struct lru_entry *stop; 8962306a36Sopenharmony_ci struct lru_entry *e; 9062306a36Sopenharmony_ci}; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistruct lru { 9362306a36Sopenharmony_ci struct list_head *cursor; 9462306a36Sopenharmony_ci unsigned long count; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci struct list_head iterators; 9762306a36Sopenharmony_ci}; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci/*--------------*/ 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_cistatic void lru_init(struct lru *lru) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci lru->cursor = NULL; 10462306a36Sopenharmony_ci lru->count = 0; 10562306a36Sopenharmony_ci INIT_LIST_HEAD(&lru->iterators); 10662306a36Sopenharmony_ci} 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cistatic void lru_destroy(struct lru *lru) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci WARN_ON_ONCE(lru->cursor); 11162306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&lru->iterators)); 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci/* 11562306a36Sopenharmony_ci * Insert a new entry into the lru. 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_cistatic void lru_insert(struct lru *lru, struct lru_entry *le) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci /* 12062306a36Sopenharmony_ci * Don't be tempted to set to 1, makes the lru aspect 12162306a36Sopenharmony_ci * perform poorly. 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_ci atomic_set(&le->referenced, 0); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (lru->cursor) { 12662306a36Sopenharmony_ci list_add_tail(&le->list, lru->cursor); 12762306a36Sopenharmony_ci } else { 12862306a36Sopenharmony_ci INIT_LIST_HEAD(&le->list); 12962306a36Sopenharmony_ci lru->cursor = &le->list; 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci lru->count++; 13262306a36Sopenharmony_ci} 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci/*--------------*/ 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci/* 13762306a36Sopenharmony_ci * Convert a list_head pointer to an lru_entry pointer. 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_cistatic inline struct lru_entry *to_le(struct list_head *l) 14062306a36Sopenharmony_ci{ 14162306a36Sopenharmony_ci return container_of(l, struct lru_entry, list); 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci/* 14562306a36Sopenharmony_ci * Initialize an lru_iter and add it to the list of cursors in the lru. 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_cistatic void lru_iter_begin(struct lru *lru, struct lru_iter *it) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci it->lru = lru; 15062306a36Sopenharmony_ci it->stop = lru->cursor ? to_le(lru->cursor->prev) : NULL; 15162306a36Sopenharmony_ci it->e = lru->cursor ? to_le(lru->cursor) : NULL; 15262306a36Sopenharmony_ci list_add(&it->list, &lru->iterators); 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci/* 15662306a36Sopenharmony_ci * Remove an lru_iter from the list of cursors in the lru. 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_cistatic inline void lru_iter_end(struct lru_iter *it) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci list_del(&it->list); 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci/* Predicate function type to be used with lru_iter_next */ 16462306a36Sopenharmony_citypedef bool (*iter_predicate)(struct lru_entry *le, void *context); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/* 16762306a36Sopenharmony_ci * Advance the cursor to the next entry that passes the 16862306a36Sopenharmony_ci * predicate, and return that entry. Returns NULL if the 16962306a36Sopenharmony_ci * iteration is complete. 17062306a36Sopenharmony_ci */ 17162306a36Sopenharmony_cistatic struct lru_entry *lru_iter_next(struct lru_iter *it, 17262306a36Sopenharmony_ci iter_predicate pred, void *context) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci struct lru_entry *e; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci while (it->e) { 17762306a36Sopenharmony_ci e = it->e; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci /* advance the cursor */ 18062306a36Sopenharmony_ci if (it->e == it->stop) 18162306a36Sopenharmony_ci it->e = NULL; 18262306a36Sopenharmony_ci else 18362306a36Sopenharmony_ci it->e = to_le(it->e->list.next); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci if (pred(e, context)) 18662306a36Sopenharmony_ci return e; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci return NULL; 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci/* 19362306a36Sopenharmony_ci * Invalidate a specific lru_entry and update all cursors in 19462306a36Sopenharmony_ci * the lru accordingly. 19562306a36Sopenharmony_ci */ 19662306a36Sopenharmony_cistatic void lru_iter_invalidate(struct lru *lru, struct lru_entry *e) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci struct lru_iter *it; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci list_for_each_entry(it, &lru->iterators, list) { 20162306a36Sopenharmony_ci /* Move c->e forwards if necc. */ 20262306a36Sopenharmony_ci if (it->e == e) { 20362306a36Sopenharmony_ci it->e = to_le(it->e->list.next); 20462306a36Sopenharmony_ci if (it->e == e) 20562306a36Sopenharmony_ci it->e = NULL; 20662306a36Sopenharmony_ci } 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* Move it->stop backwards if necc. */ 20962306a36Sopenharmony_ci if (it->stop == e) { 21062306a36Sopenharmony_ci it->stop = to_le(it->stop->list.prev); 21162306a36Sopenharmony_ci if (it->stop == e) 21262306a36Sopenharmony_ci it->stop = NULL; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci/*--------------*/ 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci/* 22062306a36Sopenharmony_ci * Remove a specific entry from the lru. 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_cistatic void lru_remove(struct lru *lru, struct lru_entry *le) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci lru_iter_invalidate(lru, le); 22562306a36Sopenharmony_ci if (lru->count == 1) { 22662306a36Sopenharmony_ci lru->cursor = NULL; 22762306a36Sopenharmony_ci } else { 22862306a36Sopenharmony_ci if (lru->cursor == &le->list) 22962306a36Sopenharmony_ci lru->cursor = lru->cursor->next; 23062306a36Sopenharmony_ci list_del(&le->list); 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci lru->count--; 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci/* 23662306a36Sopenharmony_ci * Mark as referenced. 23762306a36Sopenharmony_ci */ 23862306a36Sopenharmony_cistatic inline void lru_reference(struct lru_entry *le) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci atomic_set(&le->referenced, 1); 24162306a36Sopenharmony_ci} 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci/*--------------*/ 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci/* 24662306a36Sopenharmony_ci * Remove the least recently used entry (approx), that passes the predicate. 24762306a36Sopenharmony_ci * Returns NULL on failure. 24862306a36Sopenharmony_ci */ 24962306a36Sopenharmony_cienum evict_result { 25062306a36Sopenharmony_ci ER_EVICT, 25162306a36Sopenharmony_ci ER_DONT_EVICT, 25262306a36Sopenharmony_ci ER_STOP, /* stop looking for something to evict */ 25362306a36Sopenharmony_ci}; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_citypedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci unsigned long tested = 0; 26062306a36Sopenharmony_ci struct list_head *h = lru->cursor; 26162306a36Sopenharmony_ci struct lru_entry *le; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (!h) 26462306a36Sopenharmony_ci return NULL; 26562306a36Sopenharmony_ci /* 26662306a36Sopenharmony_ci * In the worst case we have to loop around twice. Once to clear 26762306a36Sopenharmony_ci * the reference flags, and then again to discover the predicate 26862306a36Sopenharmony_ci * fails for all entries. 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_ci while (tested < lru->count) { 27162306a36Sopenharmony_ci le = container_of(h, struct lru_entry, list); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (atomic_read(&le->referenced)) { 27462306a36Sopenharmony_ci atomic_set(&le->referenced, 0); 27562306a36Sopenharmony_ci } else { 27662306a36Sopenharmony_ci tested++; 27762306a36Sopenharmony_ci switch (pred(le, context)) { 27862306a36Sopenharmony_ci case ER_EVICT: 27962306a36Sopenharmony_ci /* 28062306a36Sopenharmony_ci * Adjust the cursor, so we start the next 28162306a36Sopenharmony_ci * search from here. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_ci lru->cursor = le->list.next; 28462306a36Sopenharmony_ci lru_remove(lru, le); 28562306a36Sopenharmony_ci return le; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci case ER_DONT_EVICT: 28862306a36Sopenharmony_ci break; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci case ER_STOP: 29162306a36Sopenharmony_ci lru->cursor = le->list.next; 29262306a36Sopenharmony_ci return NULL; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci h = h->next; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci if (!no_sleep) 29962306a36Sopenharmony_ci cond_resched(); 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci return NULL; 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci/*--------------------------------------------------------------*/ 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci/* 30862306a36Sopenharmony_ci * Buffer state bits. 30962306a36Sopenharmony_ci */ 31062306a36Sopenharmony_ci#define B_READING 0 31162306a36Sopenharmony_ci#define B_WRITING 1 31262306a36Sopenharmony_ci#define B_DIRTY 2 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci/* 31562306a36Sopenharmony_ci * Describes how the block was allocated: 31662306a36Sopenharmony_ci * kmem_cache_alloc(), __get_free_pages() or vmalloc(). 31762306a36Sopenharmony_ci * See the comment at alloc_buffer_data. 31862306a36Sopenharmony_ci */ 31962306a36Sopenharmony_cienum data_mode { 32062306a36Sopenharmony_ci DATA_MODE_SLAB = 0, 32162306a36Sopenharmony_ci DATA_MODE_GET_FREE_PAGES = 1, 32262306a36Sopenharmony_ci DATA_MODE_VMALLOC = 2, 32362306a36Sopenharmony_ci DATA_MODE_LIMIT = 3 32462306a36Sopenharmony_ci}; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_cistruct dm_buffer { 32762306a36Sopenharmony_ci /* protected by the locks in dm_buffer_cache */ 32862306a36Sopenharmony_ci struct rb_node node; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci /* immutable, so don't need protecting */ 33162306a36Sopenharmony_ci sector_t block; 33262306a36Sopenharmony_ci void *data; 33362306a36Sopenharmony_ci unsigned char data_mode; /* DATA_MODE_* */ 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci /* 33662306a36Sopenharmony_ci * These two fields are used in isolation, so do not need 33762306a36Sopenharmony_ci * a surrounding lock. 33862306a36Sopenharmony_ci */ 33962306a36Sopenharmony_ci atomic_t hold_count; 34062306a36Sopenharmony_ci unsigned long last_accessed; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci /* 34362306a36Sopenharmony_ci * Everything else is protected by the mutex in 34462306a36Sopenharmony_ci * dm_bufio_client 34562306a36Sopenharmony_ci */ 34662306a36Sopenharmony_ci unsigned long state; 34762306a36Sopenharmony_ci struct lru_entry lru; 34862306a36Sopenharmony_ci unsigned char list_mode; /* LIST_* */ 34962306a36Sopenharmony_ci blk_status_t read_error; 35062306a36Sopenharmony_ci blk_status_t write_error; 35162306a36Sopenharmony_ci unsigned int dirty_start; 35262306a36Sopenharmony_ci unsigned int dirty_end; 35362306a36Sopenharmony_ci unsigned int write_start; 35462306a36Sopenharmony_ci unsigned int write_end; 35562306a36Sopenharmony_ci struct list_head write_list; 35662306a36Sopenharmony_ci struct dm_bufio_client *c; 35762306a36Sopenharmony_ci void (*end_io)(struct dm_buffer *b, blk_status_t bs); 35862306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 35962306a36Sopenharmony_ci#define MAX_STACK 10 36062306a36Sopenharmony_ci unsigned int stack_len; 36162306a36Sopenharmony_ci unsigned long stack_entries[MAX_STACK]; 36262306a36Sopenharmony_ci#endif 36362306a36Sopenharmony_ci}; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci/*--------------------------------------------------------------*/ 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci/* 36862306a36Sopenharmony_ci * The buffer cache manages buffers, particularly: 36962306a36Sopenharmony_ci * - inc/dec of holder count 37062306a36Sopenharmony_ci * - setting the last_accessed field 37162306a36Sopenharmony_ci * - maintains clean/dirty state along with lru 37262306a36Sopenharmony_ci * - selecting buffers that match predicates 37362306a36Sopenharmony_ci * 37462306a36Sopenharmony_ci * It does *not* handle: 37562306a36Sopenharmony_ci * - allocation/freeing of buffers. 37662306a36Sopenharmony_ci * - IO 37762306a36Sopenharmony_ci * - Eviction or cache sizing. 37862306a36Sopenharmony_ci * 37962306a36Sopenharmony_ci * cache_get() and cache_put() are threadsafe, you do not need to 38062306a36Sopenharmony_ci * protect these calls with a surrounding mutex. All the other 38162306a36Sopenharmony_ci * methods are not threadsafe; they do use locking primitives, but 38262306a36Sopenharmony_ci * only enough to ensure get/put are threadsafe. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistruct buffer_tree { 38662306a36Sopenharmony_ci union { 38762306a36Sopenharmony_ci struct rw_semaphore lock; 38862306a36Sopenharmony_ci rwlock_t spinlock; 38962306a36Sopenharmony_ci } u; 39062306a36Sopenharmony_ci struct rb_root root; 39162306a36Sopenharmony_ci} ____cacheline_aligned_in_smp; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_cistruct dm_buffer_cache { 39462306a36Sopenharmony_ci struct lru lru[LIST_SIZE]; 39562306a36Sopenharmony_ci /* 39662306a36Sopenharmony_ci * We spread entries across multiple trees to reduce contention 39762306a36Sopenharmony_ci * on the locks. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_ci unsigned int num_locks; 40062306a36Sopenharmony_ci bool no_sleep; 40162306a36Sopenharmony_ci struct buffer_tree trees[]; 40262306a36Sopenharmony_ci}; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_cistatic inline unsigned int cache_index(sector_t block, unsigned int num_locks) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci return dm_hash_locks_index(block, num_locks); 40962306a36Sopenharmony_ci} 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_cistatic inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) 41462306a36Sopenharmony_ci read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); 41562306a36Sopenharmony_ci else 41662306a36Sopenharmony_ci down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); 41762306a36Sopenharmony_ci} 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistatic inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) 42062306a36Sopenharmony_ci{ 42162306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) 42262306a36Sopenharmony_ci read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); 42362306a36Sopenharmony_ci else 42462306a36Sopenharmony_ci up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); 42562306a36Sopenharmony_ci} 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_cistatic inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) 42862306a36Sopenharmony_ci{ 42962306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) 43062306a36Sopenharmony_ci write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); 43162306a36Sopenharmony_ci else 43262306a36Sopenharmony_ci down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) 43662306a36Sopenharmony_ci{ 43762306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) 43862306a36Sopenharmony_ci write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); 43962306a36Sopenharmony_ci else 44062306a36Sopenharmony_ci up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); 44162306a36Sopenharmony_ci} 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci/* 44462306a36Sopenharmony_ci * Sometimes we want to repeatedly get and drop locks as part of an iteration. 44562306a36Sopenharmony_ci * This struct helps avoid redundant drop and gets of the same lock. 44662306a36Sopenharmony_ci */ 44762306a36Sopenharmony_cistruct lock_history { 44862306a36Sopenharmony_ci struct dm_buffer_cache *cache; 44962306a36Sopenharmony_ci bool write; 45062306a36Sopenharmony_ci unsigned int previous; 45162306a36Sopenharmony_ci unsigned int no_previous; 45262306a36Sopenharmony_ci}; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistatic void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool write) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci lh->cache = cache; 45762306a36Sopenharmony_ci lh->write = write; 45862306a36Sopenharmony_ci lh->no_previous = cache->num_locks; 45962306a36Sopenharmony_ci lh->previous = lh->no_previous; 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_cistatic void __lh_lock(struct lock_history *lh, unsigned int index) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci if (lh->write) { 46562306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) 46662306a36Sopenharmony_ci write_lock_bh(&lh->cache->trees[index].u.spinlock); 46762306a36Sopenharmony_ci else 46862306a36Sopenharmony_ci down_write(&lh->cache->trees[index].u.lock); 46962306a36Sopenharmony_ci } else { 47062306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) 47162306a36Sopenharmony_ci read_lock_bh(&lh->cache->trees[index].u.spinlock); 47262306a36Sopenharmony_ci else 47362306a36Sopenharmony_ci down_read(&lh->cache->trees[index].u.lock); 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci} 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_cistatic void __lh_unlock(struct lock_history *lh, unsigned int index) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci if (lh->write) { 48062306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) 48162306a36Sopenharmony_ci write_unlock_bh(&lh->cache->trees[index].u.spinlock); 48262306a36Sopenharmony_ci else 48362306a36Sopenharmony_ci up_write(&lh->cache->trees[index].u.lock); 48462306a36Sopenharmony_ci } else { 48562306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) 48662306a36Sopenharmony_ci read_unlock_bh(&lh->cache->trees[index].u.spinlock); 48762306a36Sopenharmony_ci else 48862306a36Sopenharmony_ci up_read(&lh->cache->trees[index].u.lock); 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci} 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci/* 49362306a36Sopenharmony_ci * Make sure you call this since it will unlock the final lock. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_cistatic void lh_exit(struct lock_history *lh) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci if (lh->previous != lh->no_previous) { 49862306a36Sopenharmony_ci __lh_unlock(lh, lh->previous); 49962306a36Sopenharmony_ci lh->previous = lh->no_previous; 50062306a36Sopenharmony_ci } 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci/* 50462306a36Sopenharmony_ci * Named 'next' because there is no corresponding 50562306a36Sopenharmony_ci * 'up/unlock' call since it's done automatically. 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_cistatic void lh_next(struct lock_history *lh, sector_t b) 50862306a36Sopenharmony_ci{ 50962306a36Sopenharmony_ci unsigned int index = cache_index(b, lh->no_previous); /* no_previous is num_locks */ 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci if (lh->previous != lh->no_previous) { 51262306a36Sopenharmony_ci if (lh->previous != index) { 51362306a36Sopenharmony_ci __lh_unlock(lh, lh->previous); 51462306a36Sopenharmony_ci __lh_lock(lh, index); 51562306a36Sopenharmony_ci lh->previous = index; 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci } else { 51862306a36Sopenharmony_ci __lh_lock(lh, index); 51962306a36Sopenharmony_ci lh->previous = index; 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_cistatic inline struct dm_buffer *le_to_buffer(struct lru_entry *le) 52462306a36Sopenharmony_ci{ 52562306a36Sopenharmony_ci return container_of(le, struct dm_buffer, lru); 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_cistatic struct dm_buffer *list_to_buffer(struct list_head *l) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci struct lru_entry *le = list_entry(l, struct lru_entry, list); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci if (!le) 53362306a36Sopenharmony_ci return NULL; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci return le_to_buffer(le); 53662306a36Sopenharmony_ci} 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_cistatic void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep) 53962306a36Sopenharmony_ci{ 54062306a36Sopenharmony_ci unsigned int i; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci bc->num_locks = num_locks; 54362306a36Sopenharmony_ci bc->no_sleep = no_sleep; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci for (i = 0; i < bc->num_locks; i++) { 54662306a36Sopenharmony_ci if (no_sleep) 54762306a36Sopenharmony_ci rwlock_init(&bc->trees[i].u.spinlock); 54862306a36Sopenharmony_ci else 54962306a36Sopenharmony_ci init_rwsem(&bc->trees[i].u.lock); 55062306a36Sopenharmony_ci bc->trees[i].root = RB_ROOT; 55162306a36Sopenharmony_ci } 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci lru_init(&bc->lru[LIST_CLEAN]); 55462306a36Sopenharmony_ci lru_init(&bc->lru[LIST_DIRTY]); 55562306a36Sopenharmony_ci} 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_cistatic void cache_destroy(struct dm_buffer_cache *bc) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci unsigned int i; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci for (i = 0; i < bc->num_locks; i++) 56262306a36Sopenharmony_ci WARN_ON_ONCE(!RB_EMPTY_ROOT(&bc->trees[i].root)); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci lru_destroy(&bc->lru[LIST_CLEAN]); 56562306a36Sopenharmony_ci lru_destroy(&bc->lru[LIST_DIRTY]); 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci/*--------------*/ 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci/* 57162306a36Sopenharmony_ci * not threadsafe, or racey depending how you look at it 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_cistatic inline unsigned long cache_count(struct dm_buffer_cache *bc, int list_mode) 57462306a36Sopenharmony_ci{ 57562306a36Sopenharmony_ci return bc->lru[list_mode].count; 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cistatic inline unsigned long cache_total(struct dm_buffer_cache *bc) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci return cache_count(bc, LIST_CLEAN) + cache_count(bc, LIST_DIRTY); 58162306a36Sopenharmony_ci} 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci/*--------------*/ 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci/* 58662306a36Sopenharmony_ci * Gets a specific buffer, indexed by block. 58762306a36Sopenharmony_ci * If the buffer is found then its holder count will be incremented and 58862306a36Sopenharmony_ci * lru_reference will be called. 58962306a36Sopenharmony_ci * 59062306a36Sopenharmony_ci * threadsafe 59162306a36Sopenharmony_ci */ 59262306a36Sopenharmony_cistatic struct dm_buffer *__cache_get(const struct rb_root *root, sector_t block) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci struct rb_node *n = root->rb_node; 59562306a36Sopenharmony_ci struct dm_buffer *b; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci while (n) { 59862306a36Sopenharmony_ci b = container_of(n, struct dm_buffer, node); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (b->block == block) 60162306a36Sopenharmony_ci return b; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci n = block < b->block ? n->rb_left : n->rb_right; 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci return NULL; 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_cistatic void __cache_inc_buffer(struct dm_buffer *b) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci atomic_inc(&b->hold_count); 61262306a36Sopenharmony_ci WRITE_ONCE(b->last_accessed, jiffies); 61362306a36Sopenharmony_ci} 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_cistatic struct dm_buffer *cache_get(struct dm_buffer_cache *bc, sector_t block) 61662306a36Sopenharmony_ci{ 61762306a36Sopenharmony_ci struct dm_buffer *b; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci cache_read_lock(bc, block); 62062306a36Sopenharmony_ci b = __cache_get(&bc->trees[cache_index(block, bc->num_locks)].root, block); 62162306a36Sopenharmony_ci if (b) { 62262306a36Sopenharmony_ci lru_reference(&b->lru); 62362306a36Sopenharmony_ci __cache_inc_buffer(b); 62462306a36Sopenharmony_ci } 62562306a36Sopenharmony_ci cache_read_unlock(bc, block); 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci return b; 62862306a36Sopenharmony_ci} 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci/*--------------*/ 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci/* 63362306a36Sopenharmony_ci * Returns true if the hold count hits zero. 63462306a36Sopenharmony_ci * threadsafe 63562306a36Sopenharmony_ci */ 63662306a36Sopenharmony_cistatic bool cache_put(struct dm_buffer_cache *bc, struct dm_buffer *b) 63762306a36Sopenharmony_ci{ 63862306a36Sopenharmony_ci bool r; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci cache_read_lock(bc, b->block); 64162306a36Sopenharmony_ci BUG_ON(!atomic_read(&b->hold_count)); 64262306a36Sopenharmony_ci r = atomic_dec_and_test(&b->hold_count); 64362306a36Sopenharmony_ci cache_read_unlock(bc, b->block); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci return r; 64662306a36Sopenharmony_ci} 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci/*--------------*/ 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_citypedef enum evict_result (*b_predicate)(struct dm_buffer *, void *); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci/* 65362306a36Sopenharmony_ci * Evicts a buffer based on a predicate. The oldest buffer that 65462306a36Sopenharmony_ci * matches the predicate will be selected. In addition to the 65562306a36Sopenharmony_ci * predicate the hold_count of the selected buffer will be zero. 65662306a36Sopenharmony_ci */ 65762306a36Sopenharmony_cistruct evict_wrapper { 65862306a36Sopenharmony_ci struct lock_history *lh; 65962306a36Sopenharmony_ci b_predicate pred; 66062306a36Sopenharmony_ci void *context; 66162306a36Sopenharmony_ci}; 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci/* 66462306a36Sopenharmony_ci * Wraps the buffer predicate turning it into an lru predicate. Adds 66562306a36Sopenharmony_ci * extra test for hold_count. 66662306a36Sopenharmony_ci */ 66762306a36Sopenharmony_cistatic enum evict_result __evict_pred(struct lru_entry *le, void *context) 66862306a36Sopenharmony_ci{ 66962306a36Sopenharmony_ci struct evict_wrapper *w = context; 67062306a36Sopenharmony_ci struct dm_buffer *b = le_to_buffer(le); 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci lh_next(w->lh, b->block); 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci if (atomic_read(&b->hold_count)) 67562306a36Sopenharmony_ci return ER_DONT_EVICT; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci return w->pred(b, w->context); 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_cistatic struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode, 68162306a36Sopenharmony_ci b_predicate pred, void *context, 68262306a36Sopenharmony_ci struct lock_history *lh) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; 68562306a36Sopenharmony_ci struct lru_entry *le; 68662306a36Sopenharmony_ci struct dm_buffer *b; 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); 68962306a36Sopenharmony_ci if (!le) 69062306a36Sopenharmony_ci return NULL; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci b = le_to_buffer(le); 69362306a36Sopenharmony_ci /* __evict_pred will have locked the appropriate tree. */ 69462306a36Sopenharmony_ci rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci return b; 69762306a36Sopenharmony_ci} 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_cistatic struct dm_buffer *cache_evict(struct dm_buffer_cache *bc, int list_mode, 70062306a36Sopenharmony_ci b_predicate pred, void *context) 70162306a36Sopenharmony_ci{ 70262306a36Sopenharmony_ci struct dm_buffer *b; 70362306a36Sopenharmony_ci struct lock_history lh; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci lh_init(&lh, bc, true); 70662306a36Sopenharmony_ci b = __cache_evict(bc, list_mode, pred, context, &lh); 70762306a36Sopenharmony_ci lh_exit(&lh); 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci return b; 71062306a36Sopenharmony_ci} 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci/*--------------*/ 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci/* 71562306a36Sopenharmony_ci * Mark a buffer as clean or dirty. Not threadsafe. 71662306a36Sopenharmony_ci */ 71762306a36Sopenharmony_cistatic void cache_mark(struct dm_buffer_cache *bc, struct dm_buffer *b, int list_mode) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci cache_write_lock(bc, b->block); 72062306a36Sopenharmony_ci if (list_mode != b->list_mode) { 72162306a36Sopenharmony_ci lru_remove(&bc->lru[b->list_mode], &b->lru); 72262306a36Sopenharmony_ci b->list_mode = list_mode; 72362306a36Sopenharmony_ci lru_insert(&bc->lru[b->list_mode], &b->lru); 72462306a36Sopenharmony_ci } 72562306a36Sopenharmony_ci cache_write_unlock(bc, b->block); 72662306a36Sopenharmony_ci} 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci/*--------------*/ 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci/* 73162306a36Sopenharmony_ci * Runs through the lru associated with 'old_mode', if the predicate matches then 73262306a36Sopenharmony_ci * it moves them to 'new_mode'. Not threadsafe. 73362306a36Sopenharmony_ci */ 73462306a36Sopenharmony_cistatic void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_mode, 73562306a36Sopenharmony_ci b_predicate pred, void *context, struct lock_history *lh) 73662306a36Sopenharmony_ci{ 73762306a36Sopenharmony_ci struct lru_entry *le; 73862306a36Sopenharmony_ci struct dm_buffer *b; 73962306a36Sopenharmony_ci struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci while (true) { 74262306a36Sopenharmony_ci le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); 74362306a36Sopenharmony_ci if (!le) 74462306a36Sopenharmony_ci break; 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci b = le_to_buffer(le); 74762306a36Sopenharmony_ci b->list_mode = new_mode; 74862306a36Sopenharmony_ci lru_insert(&bc->lru[b->list_mode], &b->lru); 74962306a36Sopenharmony_ci } 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_cistatic void cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_mode, 75362306a36Sopenharmony_ci b_predicate pred, void *context) 75462306a36Sopenharmony_ci{ 75562306a36Sopenharmony_ci struct lock_history lh; 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci lh_init(&lh, bc, true); 75862306a36Sopenharmony_ci __cache_mark_many(bc, old_mode, new_mode, pred, context, &lh); 75962306a36Sopenharmony_ci lh_exit(&lh); 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci/*--------------*/ 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci/* 76562306a36Sopenharmony_ci * Iterates through all clean or dirty entries calling a function for each 76662306a36Sopenharmony_ci * entry. The callback may terminate the iteration early. Not threadsafe. 76762306a36Sopenharmony_ci */ 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci/* 77062306a36Sopenharmony_ci * Iterator functions should return one of these actions to indicate 77162306a36Sopenharmony_ci * how the iteration should proceed. 77262306a36Sopenharmony_ci */ 77362306a36Sopenharmony_cienum it_action { 77462306a36Sopenharmony_ci IT_NEXT, 77562306a36Sopenharmony_ci IT_COMPLETE, 77662306a36Sopenharmony_ci}; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_citypedef enum it_action (*iter_fn)(struct dm_buffer *b, void *context); 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_cistatic void __cache_iterate(struct dm_buffer_cache *bc, int list_mode, 78162306a36Sopenharmony_ci iter_fn fn, void *context, struct lock_history *lh) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct lru *lru = &bc->lru[list_mode]; 78462306a36Sopenharmony_ci struct lru_entry *le, *first; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci if (!lru->cursor) 78762306a36Sopenharmony_ci return; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci first = le = to_le(lru->cursor); 79062306a36Sopenharmony_ci do { 79162306a36Sopenharmony_ci struct dm_buffer *b = le_to_buffer(le); 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci lh_next(lh, b->block); 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci switch (fn(b, context)) { 79662306a36Sopenharmony_ci case IT_NEXT: 79762306a36Sopenharmony_ci break; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci case IT_COMPLETE: 80062306a36Sopenharmony_ci return; 80162306a36Sopenharmony_ci } 80262306a36Sopenharmony_ci cond_resched(); 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci le = to_le(le->list.next); 80562306a36Sopenharmony_ci } while (le != first); 80662306a36Sopenharmony_ci} 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_cistatic void cache_iterate(struct dm_buffer_cache *bc, int list_mode, 80962306a36Sopenharmony_ci iter_fn fn, void *context) 81062306a36Sopenharmony_ci{ 81162306a36Sopenharmony_ci struct lock_history lh; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci lh_init(&lh, bc, false); 81462306a36Sopenharmony_ci __cache_iterate(bc, list_mode, fn, context, &lh); 81562306a36Sopenharmony_ci lh_exit(&lh); 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci/*--------------*/ 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci/* 82162306a36Sopenharmony_ci * Passes ownership of the buffer to the cache. Returns false if the 82262306a36Sopenharmony_ci * buffer was already present (in which case ownership does not pass). 82362306a36Sopenharmony_ci * eg, a race with another thread. 82462306a36Sopenharmony_ci * 82562306a36Sopenharmony_ci * Holder count should be 1 on insertion. 82662306a36Sopenharmony_ci * 82762306a36Sopenharmony_ci * Not threadsafe. 82862306a36Sopenharmony_ci */ 82962306a36Sopenharmony_cistatic bool __cache_insert(struct rb_root *root, struct dm_buffer *b) 83062306a36Sopenharmony_ci{ 83162306a36Sopenharmony_ci struct rb_node **new = &root->rb_node, *parent = NULL; 83262306a36Sopenharmony_ci struct dm_buffer *found; 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci while (*new) { 83562306a36Sopenharmony_ci found = container_of(*new, struct dm_buffer, node); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci if (found->block == b->block) 83862306a36Sopenharmony_ci return false; 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci parent = *new; 84162306a36Sopenharmony_ci new = b->block < found->block ? 84262306a36Sopenharmony_ci &found->node.rb_left : &found->node.rb_right; 84362306a36Sopenharmony_ci } 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci rb_link_node(&b->node, parent, new); 84662306a36Sopenharmony_ci rb_insert_color(&b->node, root); 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci return true; 84962306a36Sopenharmony_ci} 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_cistatic bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci bool r; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci if (WARN_ON_ONCE(b->list_mode >= LIST_SIZE)) 85662306a36Sopenharmony_ci return false; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci cache_write_lock(bc, b->block); 85962306a36Sopenharmony_ci BUG_ON(atomic_read(&b->hold_count) != 1); 86062306a36Sopenharmony_ci r = __cache_insert(&bc->trees[cache_index(b->block, bc->num_locks)].root, b); 86162306a36Sopenharmony_ci if (r) 86262306a36Sopenharmony_ci lru_insert(&bc->lru[b->list_mode], &b->lru); 86362306a36Sopenharmony_ci cache_write_unlock(bc, b->block); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci return r; 86662306a36Sopenharmony_ci} 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci/*--------------*/ 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci/* 87162306a36Sopenharmony_ci * Removes buffer from cache, ownership of the buffer passes back to the caller. 87262306a36Sopenharmony_ci * Fails if the hold_count is not one (ie. the caller holds the only reference). 87362306a36Sopenharmony_ci * 87462306a36Sopenharmony_ci * Not threadsafe. 87562306a36Sopenharmony_ci */ 87662306a36Sopenharmony_cistatic bool cache_remove(struct dm_buffer_cache *bc, struct dm_buffer *b) 87762306a36Sopenharmony_ci{ 87862306a36Sopenharmony_ci bool r; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci cache_write_lock(bc, b->block); 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci if (atomic_read(&b->hold_count) != 1) { 88362306a36Sopenharmony_ci r = false; 88462306a36Sopenharmony_ci } else { 88562306a36Sopenharmony_ci r = true; 88662306a36Sopenharmony_ci rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); 88762306a36Sopenharmony_ci lru_remove(&bc->lru[b->list_mode], &b->lru); 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci cache_write_unlock(bc, b->block); 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci return r; 89362306a36Sopenharmony_ci} 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci/*--------------*/ 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_citypedef void (*b_release)(struct dm_buffer *); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_cistatic struct dm_buffer *__find_next(struct rb_root *root, sector_t block) 90062306a36Sopenharmony_ci{ 90162306a36Sopenharmony_ci struct rb_node *n = root->rb_node; 90262306a36Sopenharmony_ci struct dm_buffer *b; 90362306a36Sopenharmony_ci struct dm_buffer *best = NULL; 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci while (n) { 90662306a36Sopenharmony_ci b = container_of(n, struct dm_buffer, node); 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci if (b->block == block) 90962306a36Sopenharmony_ci return b; 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci if (block <= b->block) { 91262306a36Sopenharmony_ci n = n->rb_left; 91362306a36Sopenharmony_ci best = b; 91462306a36Sopenharmony_ci } else { 91562306a36Sopenharmony_ci n = n->rb_right; 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci } 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci return best; 92062306a36Sopenharmony_ci} 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_cistatic void __remove_range(struct dm_buffer_cache *bc, 92362306a36Sopenharmony_ci struct rb_root *root, 92462306a36Sopenharmony_ci sector_t begin, sector_t end, 92562306a36Sopenharmony_ci b_predicate pred, b_release release) 92662306a36Sopenharmony_ci{ 92762306a36Sopenharmony_ci struct dm_buffer *b; 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci while (true) { 93062306a36Sopenharmony_ci cond_resched(); 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci b = __find_next(root, begin); 93362306a36Sopenharmony_ci if (!b || (b->block >= end)) 93462306a36Sopenharmony_ci break; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci begin = b->block + 1; 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci if (atomic_read(&b->hold_count)) 93962306a36Sopenharmony_ci continue; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci if (pred(b, NULL) == ER_EVICT) { 94262306a36Sopenharmony_ci rb_erase(&b->node, root); 94362306a36Sopenharmony_ci lru_remove(&bc->lru[b->list_mode], &b->lru); 94462306a36Sopenharmony_ci release(b); 94562306a36Sopenharmony_ci } 94662306a36Sopenharmony_ci } 94762306a36Sopenharmony_ci} 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_cistatic void cache_remove_range(struct dm_buffer_cache *bc, 95062306a36Sopenharmony_ci sector_t begin, sector_t end, 95162306a36Sopenharmony_ci b_predicate pred, b_release release) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci unsigned int i; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci BUG_ON(bc->no_sleep); 95662306a36Sopenharmony_ci for (i = 0; i < bc->num_locks; i++) { 95762306a36Sopenharmony_ci down_write(&bc->trees[i].u.lock); 95862306a36Sopenharmony_ci __remove_range(bc, &bc->trees[i].root, begin, end, pred, release); 95962306a36Sopenharmony_ci up_write(&bc->trees[i].u.lock); 96062306a36Sopenharmony_ci } 96162306a36Sopenharmony_ci} 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci/* 96662306a36Sopenharmony_ci * Linking of buffers: 96762306a36Sopenharmony_ci * All buffers are linked to buffer_cache with their node field. 96862306a36Sopenharmony_ci * 96962306a36Sopenharmony_ci * Clean buffers that are not being written (B_WRITING not set) 97062306a36Sopenharmony_ci * are linked to lru[LIST_CLEAN] with their lru_list field. 97162306a36Sopenharmony_ci * 97262306a36Sopenharmony_ci * Dirty and clean buffers that are being written are linked to 97362306a36Sopenharmony_ci * lru[LIST_DIRTY] with their lru_list field. When the write 97462306a36Sopenharmony_ci * finishes, the buffer cannot be relinked immediately (because we 97562306a36Sopenharmony_ci * are in an interrupt context and relinking requires process 97662306a36Sopenharmony_ci * context), so some clean-not-writing buffers can be held on 97762306a36Sopenharmony_ci * dirty_lru too. They are later added to lru in the process 97862306a36Sopenharmony_ci * context. 97962306a36Sopenharmony_ci */ 98062306a36Sopenharmony_cistruct dm_bufio_client { 98162306a36Sopenharmony_ci struct block_device *bdev; 98262306a36Sopenharmony_ci unsigned int block_size; 98362306a36Sopenharmony_ci s8 sectors_per_block_bits; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci bool no_sleep; 98662306a36Sopenharmony_ci struct mutex lock; 98762306a36Sopenharmony_ci spinlock_t spinlock; 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci int async_write_error; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci void (*alloc_callback)(struct dm_buffer *buf); 99262306a36Sopenharmony_ci void (*write_callback)(struct dm_buffer *buf); 99362306a36Sopenharmony_ci struct kmem_cache *slab_buffer; 99462306a36Sopenharmony_ci struct kmem_cache *slab_cache; 99562306a36Sopenharmony_ci struct dm_io_client *dm_io; 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci struct list_head reserved_buffers; 99862306a36Sopenharmony_ci unsigned int need_reserved_buffers; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci unsigned int minimum_buffers; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci sector_t start; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci struct shrinker shrinker; 100562306a36Sopenharmony_ci struct work_struct shrink_work; 100662306a36Sopenharmony_ci atomic_long_t need_shrink; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci wait_queue_head_t free_buffer_wait; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci struct list_head client_list; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci /* 101362306a36Sopenharmony_ci * Used by global_cleanup to sort the clients list. 101462306a36Sopenharmony_ci */ 101562306a36Sopenharmony_ci unsigned long oldest_buffer; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci struct dm_buffer_cache cache; /* must be last member */ 101862306a36Sopenharmony_ci}; 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci#define dm_bufio_in_request() (!!current->bio_list) 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_cistatic void dm_bufio_lock(struct dm_bufio_client *c) 102562306a36Sopenharmony_ci{ 102662306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 102762306a36Sopenharmony_ci spin_lock_bh(&c->spinlock); 102862306a36Sopenharmony_ci else 102962306a36Sopenharmony_ci mutex_lock_nested(&c->lock, dm_bufio_in_request()); 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_cistatic void dm_bufio_unlock(struct dm_bufio_client *c) 103362306a36Sopenharmony_ci{ 103462306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 103562306a36Sopenharmony_ci spin_unlock_bh(&c->spinlock); 103662306a36Sopenharmony_ci else 103762306a36Sopenharmony_ci mutex_unlock(&c->lock); 103862306a36Sopenharmony_ci} 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci/* 104362306a36Sopenharmony_ci * Default cache size: available memory divided by the ratio. 104462306a36Sopenharmony_ci */ 104562306a36Sopenharmony_cistatic unsigned long dm_bufio_default_cache_size; 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci/* 104862306a36Sopenharmony_ci * Total cache size set by the user. 104962306a36Sopenharmony_ci */ 105062306a36Sopenharmony_cistatic unsigned long dm_bufio_cache_size; 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci/* 105362306a36Sopenharmony_ci * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change 105462306a36Sopenharmony_ci * at any time. If it disagrees, the user has changed cache size. 105562306a36Sopenharmony_ci */ 105662306a36Sopenharmony_cistatic unsigned long dm_bufio_cache_size_latch; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(global_spinlock); 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci/* 106162306a36Sopenharmony_ci * Buffers are freed after this timeout 106262306a36Sopenharmony_ci */ 106362306a36Sopenharmony_cistatic unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; 106462306a36Sopenharmony_cistatic unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_cistatic unsigned long dm_bufio_peak_allocated; 106762306a36Sopenharmony_cistatic unsigned long dm_bufio_allocated_kmem_cache; 106862306a36Sopenharmony_cistatic unsigned long dm_bufio_allocated_get_free_pages; 106962306a36Sopenharmony_cistatic unsigned long dm_bufio_allocated_vmalloc; 107062306a36Sopenharmony_cistatic unsigned long dm_bufio_current_allocated; 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci/* 107562306a36Sopenharmony_ci * The current number of clients. 107662306a36Sopenharmony_ci */ 107762306a36Sopenharmony_cistatic int dm_bufio_client_count; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci/* 108062306a36Sopenharmony_ci * The list of all clients. 108162306a36Sopenharmony_ci */ 108262306a36Sopenharmony_cistatic LIST_HEAD(dm_bufio_all_clients); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci/* 108562306a36Sopenharmony_ci * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count 108662306a36Sopenharmony_ci */ 108762306a36Sopenharmony_cistatic DEFINE_MUTEX(dm_bufio_clients_lock); 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_cistatic struct workqueue_struct *dm_bufio_wq; 109062306a36Sopenharmony_cistatic struct delayed_work dm_bufio_cleanup_old_work; 109162306a36Sopenharmony_cistatic struct work_struct dm_bufio_replacement_work; 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 109562306a36Sopenharmony_cistatic void buffer_record_stack(struct dm_buffer *b) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2); 109862306a36Sopenharmony_ci} 109962306a36Sopenharmony_ci#endif 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_cistatic void adjust_total_allocated(struct dm_buffer *b, bool unlink) 110462306a36Sopenharmony_ci{ 110562306a36Sopenharmony_ci unsigned char data_mode; 110662306a36Sopenharmony_ci long diff; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { 110962306a36Sopenharmony_ci &dm_bufio_allocated_kmem_cache, 111062306a36Sopenharmony_ci &dm_bufio_allocated_get_free_pages, 111162306a36Sopenharmony_ci &dm_bufio_allocated_vmalloc, 111262306a36Sopenharmony_ci }; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci data_mode = b->data_mode; 111562306a36Sopenharmony_ci diff = (long)b->c->block_size; 111662306a36Sopenharmony_ci if (unlink) 111762306a36Sopenharmony_ci diff = -diff; 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci spin_lock(&global_spinlock); 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_ci *class_ptr[data_mode] += diff; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci dm_bufio_current_allocated += diff; 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci if (dm_bufio_current_allocated > dm_bufio_peak_allocated) 112662306a36Sopenharmony_ci dm_bufio_peak_allocated = dm_bufio_current_allocated; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (!unlink) { 112962306a36Sopenharmony_ci if (dm_bufio_current_allocated > dm_bufio_cache_size) 113062306a36Sopenharmony_ci queue_work(dm_bufio_wq, &dm_bufio_replacement_work); 113162306a36Sopenharmony_ci } 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci spin_unlock(&global_spinlock); 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci/* 113762306a36Sopenharmony_ci * Change the number of clients and recalculate per-client limit. 113862306a36Sopenharmony_ci */ 113962306a36Sopenharmony_cistatic void __cache_size_refresh(void) 114062306a36Sopenharmony_ci{ 114162306a36Sopenharmony_ci if (WARN_ON(!mutex_is_locked(&dm_bufio_clients_lock))) 114262306a36Sopenharmony_ci return; 114362306a36Sopenharmony_ci if (WARN_ON(dm_bufio_client_count < 0)) 114462306a36Sopenharmony_ci return; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size); 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci /* 114962306a36Sopenharmony_ci * Use default if set to 0 and report the actual cache size used. 115062306a36Sopenharmony_ci */ 115162306a36Sopenharmony_ci if (!dm_bufio_cache_size_latch) { 115262306a36Sopenharmony_ci (void)cmpxchg(&dm_bufio_cache_size, 0, 115362306a36Sopenharmony_ci dm_bufio_default_cache_size); 115462306a36Sopenharmony_ci dm_bufio_cache_size_latch = dm_bufio_default_cache_size; 115562306a36Sopenharmony_ci } 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci/* 115962306a36Sopenharmony_ci * Allocating buffer data. 116062306a36Sopenharmony_ci * 116162306a36Sopenharmony_ci * Small buffers are allocated with kmem_cache, to use space optimally. 116262306a36Sopenharmony_ci * 116362306a36Sopenharmony_ci * For large buffers, we choose between get_free_pages and vmalloc. 116462306a36Sopenharmony_ci * Each has advantages and disadvantages. 116562306a36Sopenharmony_ci * 116662306a36Sopenharmony_ci * __get_free_pages can randomly fail if the memory is fragmented. 116762306a36Sopenharmony_ci * __vmalloc won't randomly fail, but vmalloc space is limited (it may be 116862306a36Sopenharmony_ci * as low as 128M) so using it for caching is not appropriate. 116962306a36Sopenharmony_ci * 117062306a36Sopenharmony_ci * If the allocation may fail we use __get_free_pages. Memory fragmentation 117162306a36Sopenharmony_ci * won't have a fatal effect here, but it just causes flushes of some other 117262306a36Sopenharmony_ci * buffers and more I/O will be performed. Don't use __get_free_pages if it 117362306a36Sopenharmony_ci * always fails (i.e. order > MAX_ORDER). 117462306a36Sopenharmony_ci * 117562306a36Sopenharmony_ci * If the allocation shouldn't fail we use __vmalloc. This is only for the 117662306a36Sopenharmony_ci * initial reserve allocation, so there's no risk of wasting all vmalloc 117762306a36Sopenharmony_ci * space. 117862306a36Sopenharmony_ci */ 117962306a36Sopenharmony_cistatic void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, 118062306a36Sopenharmony_ci unsigned char *data_mode) 118162306a36Sopenharmony_ci{ 118262306a36Sopenharmony_ci if (unlikely(c->slab_cache != NULL)) { 118362306a36Sopenharmony_ci *data_mode = DATA_MODE_SLAB; 118462306a36Sopenharmony_ci return kmem_cache_alloc(c->slab_cache, gfp_mask); 118562306a36Sopenharmony_ci } 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci if (c->block_size <= KMALLOC_MAX_SIZE && 118862306a36Sopenharmony_ci gfp_mask & __GFP_NORETRY) { 118962306a36Sopenharmony_ci *data_mode = DATA_MODE_GET_FREE_PAGES; 119062306a36Sopenharmony_ci return (void *)__get_free_pages(gfp_mask, 119162306a36Sopenharmony_ci c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 119262306a36Sopenharmony_ci } 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci *data_mode = DATA_MODE_VMALLOC; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci return __vmalloc(c->block_size, gfp_mask); 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci/* 120062306a36Sopenharmony_ci * Free buffer's data. 120162306a36Sopenharmony_ci */ 120262306a36Sopenharmony_cistatic void free_buffer_data(struct dm_bufio_client *c, 120362306a36Sopenharmony_ci void *data, unsigned char data_mode) 120462306a36Sopenharmony_ci{ 120562306a36Sopenharmony_ci switch (data_mode) { 120662306a36Sopenharmony_ci case DATA_MODE_SLAB: 120762306a36Sopenharmony_ci kmem_cache_free(c->slab_cache, data); 120862306a36Sopenharmony_ci break; 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci case DATA_MODE_GET_FREE_PAGES: 121162306a36Sopenharmony_ci free_pages((unsigned long)data, 121262306a36Sopenharmony_ci c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 121362306a36Sopenharmony_ci break; 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci case DATA_MODE_VMALLOC: 121662306a36Sopenharmony_ci vfree(data); 121762306a36Sopenharmony_ci break; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci default: 122062306a36Sopenharmony_ci DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d", 122162306a36Sopenharmony_ci data_mode); 122262306a36Sopenharmony_ci BUG(); 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci} 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci/* 122762306a36Sopenharmony_ci * Allocate buffer and its data. 122862306a36Sopenharmony_ci */ 122962306a36Sopenharmony_cistatic struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) 123062306a36Sopenharmony_ci{ 123162306a36Sopenharmony_ci struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask); 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci if (!b) 123462306a36Sopenharmony_ci return NULL; 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci b->c = c; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); 123962306a36Sopenharmony_ci if (!b->data) { 124062306a36Sopenharmony_ci kmem_cache_free(c->slab_buffer, b); 124162306a36Sopenharmony_ci return NULL; 124262306a36Sopenharmony_ci } 124362306a36Sopenharmony_ci adjust_total_allocated(b, false); 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 124662306a36Sopenharmony_ci b->stack_len = 0; 124762306a36Sopenharmony_ci#endif 124862306a36Sopenharmony_ci return b; 124962306a36Sopenharmony_ci} 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci/* 125262306a36Sopenharmony_ci * Free buffer and its data. 125362306a36Sopenharmony_ci */ 125462306a36Sopenharmony_cistatic void free_buffer(struct dm_buffer *b) 125562306a36Sopenharmony_ci{ 125662306a36Sopenharmony_ci struct dm_bufio_client *c = b->c; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci adjust_total_allocated(b, true); 125962306a36Sopenharmony_ci free_buffer_data(c, b->data, b->data_mode); 126062306a36Sopenharmony_ci kmem_cache_free(c->slab_buffer, b); 126162306a36Sopenharmony_ci} 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci/* 126462306a36Sopenharmony_ci *-------------------------------------------------------------------------- 126562306a36Sopenharmony_ci * Submit I/O on the buffer. 126662306a36Sopenharmony_ci * 126762306a36Sopenharmony_ci * Bio interface is faster but it has some problems: 126862306a36Sopenharmony_ci * the vector list is limited (increasing this limit increases 126962306a36Sopenharmony_ci * memory-consumption per buffer, so it is not viable); 127062306a36Sopenharmony_ci * 127162306a36Sopenharmony_ci * the memory must be direct-mapped, not vmalloced; 127262306a36Sopenharmony_ci * 127362306a36Sopenharmony_ci * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and 127462306a36Sopenharmony_ci * it is not vmalloced, try using the bio interface. 127562306a36Sopenharmony_ci * 127662306a36Sopenharmony_ci * If the buffer is big, if it is vmalloced or if the underlying device 127762306a36Sopenharmony_ci * rejects the bio because it is too large, use dm-io layer to do the I/O. 127862306a36Sopenharmony_ci * The dm-io layer splits the I/O into multiple requests, avoiding the above 127962306a36Sopenharmony_ci * shortcomings. 128062306a36Sopenharmony_ci *-------------------------------------------------------------------------- 128162306a36Sopenharmony_ci */ 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci/* 128462306a36Sopenharmony_ci * dm-io completion routine. It just calls b->bio.bi_end_io, pretending 128562306a36Sopenharmony_ci * that the request was handled directly with bio interface. 128662306a36Sopenharmony_ci */ 128762306a36Sopenharmony_cistatic void dmio_complete(unsigned long error, void *context) 128862306a36Sopenharmony_ci{ 128962306a36Sopenharmony_ci struct dm_buffer *b = context; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); 129262306a36Sopenharmony_ci} 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_cistatic void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, 129562306a36Sopenharmony_ci unsigned int n_sectors, unsigned int offset) 129662306a36Sopenharmony_ci{ 129762306a36Sopenharmony_ci int r; 129862306a36Sopenharmony_ci struct dm_io_request io_req = { 129962306a36Sopenharmony_ci .bi_opf = op, 130062306a36Sopenharmony_ci .notify.fn = dmio_complete, 130162306a36Sopenharmony_ci .notify.context = b, 130262306a36Sopenharmony_ci .client = b->c->dm_io, 130362306a36Sopenharmony_ci }; 130462306a36Sopenharmony_ci struct dm_io_region region = { 130562306a36Sopenharmony_ci .bdev = b->c->bdev, 130662306a36Sopenharmony_ci .sector = sector, 130762306a36Sopenharmony_ci .count = n_sectors, 130862306a36Sopenharmony_ci }; 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci if (b->data_mode != DATA_MODE_VMALLOC) { 131162306a36Sopenharmony_ci io_req.mem.type = DM_IO_KMEM; 131262306a36Sopenharmony_ci io_req.mem.ptr.addr = (char *)b->data + offset; 131362306a36Sopenharmony_ci } else { 131462306a36Sopenharmony_ci io_req.mem.type = DM_IO_VMA; 131562306a36Sopenharmony_ci io_req.mem.ptr.vma = (char *)b->data + offset; 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_ci r = dm_io(&io_req, 1, ®ion, NULL, IOPRIO_DEFAULT); 131962306a36Sopenharmony_ci if (unlikely(r)) 132062306a36Sopenharmony_ci b->end_io(b, errno_to_blk_status(r)); 132162306a36Sopenharmony_ci} 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_cistatic void bio_complete(struct bio *bio) 132462306a36Sopenharmony_ci{ 132562306a36Sopenharmony_ci struct dm_buffer *b = bio->bi_private; 132662306a36Sopenharmony_ci blk_status_t status = bio->bi_status; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci bio_uninit(bio); 132962306a36Sopenharmony_ci kfree(bio); 133062306a36Sopenharmony_ci b->end_io(b, status); 133162306a36Sopenharmony_ci} 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_cistatic void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, 133462306a36Sopenharmony_ci unsigned int n_sectors, unsigned int offset) 133562306a36Sopenharmony_ci{ 133662306a36Sopenharmony_ci struct bio *bio; 133762306a36Sopenharmony_ci char *ptr; 133862306a36Sopenharmony_ci unsigned int len; 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); 134162306a36Sopenharmony_ci if (!bio) { 134262306a36Sopenharmony_ci use_dmio(b, op, sector, n_sectors, offset); 134362306a36Sopenharmony_ci return; 134462306a36Sopenharmony_ci } 134562306a36Sopenharmony_ci bio_init(bio, b->c->bdev, bio->bi_inline_vecs, 1, op); 134662306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 134762306a36Sopenharmony_ci bio->bi_end_io = bio_complete; 134862306a36Sopenharmony_ci bio->bi_private = b; 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci ptr = (char *)b->data + offset; 135162306a36Sopenharmony_ci len = n_sectors << SECTOR_SHIFT; 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci __bio_add_page(bio, virt_to_page(ptr), len, offset_in_page(ptr)); 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci submit_bio(bio); 135662306a36Sopenharmony_ci} 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_cistatic inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block) 135962306a36Sopenharmony_ci{ 136062306a36Sopenharmony_ci sector_t sector; 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci if (likely(c->sectors_per_block_bits >= 0)) 136362306a36Sopenharmony_ci sector = block << c->sectors_per_block_bits; 136462306a36Sopenharmony_ci else 136562306a36Sopenharmony_ci sector = block * (c->block_size >> SECTOR_SHIFT); 136662306a36Sopenharmony_ci sector += c->start; 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci return sector; 136962306a36Sopenharmony_ci} 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_cistatic void submit_io(struct dm_buffer *b, enum req_op op, 137262306a36Sopenharmony_ci void (*end_io)(struct dm_buffer *, blk_status_t)) 137362306a36Sopenharmony_ci{ 137462306a36Sopenharmony_ci unsigned int n_sectors; 137562306a36Sopenharmony_ci sector_t sector; 137662306a36Sopenharmony_ci unsigned int offset, end; 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci b->end_io = end_io; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci sector = block_to_sector(b->c, b->block); 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci if (op != REQ_OP_WRITE) { 138362306a36Sopenharmony_ci n_sectors = b->c->block_size >> SECTOR_SHIFT; 138462306a36Sopenharmony_ci offset = 0; 138562306a36Sopenharmony_ci } else { 138662306a36Sopenharmony_ci if (b->c->write_callback) 138762306a36Sopenharmony_ci b->c->write_callback(b); 138862306a36Sopenharmony_ci offset = b->write_start; 138962306a36Sopenharmony_ci end = b->write_end; 139062306a36Sopenharmony_ci offset &= -DM_BUFIO_WRITE_ALIGN; 139162306a36Sopenharmony_ci end += DM_BUFIO_WRITE_ALIGN - 1; 139262306a36Sopenharmony_ci end &= -DM_BUFIO_WRITE_ALIGN; 139362306a36Sopenharmony_ci if (unlikely(end > b->c->block_size)) 139462306a36Sopenharmony_ci end = b->c->block_size; 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci sector += offset >> SECTOR_SHIFT; 139762306a36Sopenharmony_ci n_sectors = (end - offset) >> SECTOR_SHIFT; 139862306a36Sopenharmony_ci } 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci if (b->data_mode != DATA_MODE_VMALLOC) 140162306a36Sopenharmony_ci use_bio(b, op, sector, n_sectors, offset); 140262306a36Sopenharmony_ci else 140362306a36Sopenharmony_ci use_dmio(b, op, sector, n_sectors, offset); 140462306a36Sopenharmony_ci} 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci/* 140762306a36Sopenharmony_ci *-------------------------------------------------------------- 140862306a36Sopenharmony_ci * Writing dirty buffers 140962306a36Sopenharmony_ci *-------------------------------------------------------------- 141062306a36Sopenharmony_ci */ 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci/* 141362306a36Sopenharmony_ci * The endio routine for write. 141462306a36Sopenharmony_ci * 141562306a36Sopenharmony_ci * Set the error, clear B_WRITING bit and wake anyone who was waiting on 141662306a36Sopenharmony_ci * it. 141762306a36Sopenharmony_ci */ 141862306a36Sopenharmony_cistatic void write_endio(struct dm_buffer *b, blk_status_t status) 141962306a36Sopenharmony_ci{ 142062306a36Sopenharmony_ci b->write_error = status; 142162306a36Sopenharmony_ci if (unlikely(status)) { 142262306a36Sopenharmony_ci struct dm_bufio_client *c = b->c; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci (void)cmpxchg(&c->async_write_error, 0, 142562306a36Sopenharmony_ci blk_status_to_errno(status)); 142662306a36Sopenharmony_ci } 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci BUG_ON(!test_bit(B_WRITING, &b->state)); 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci smp_mb__before_atomic(); 143162306a36Sopenharmony_ci clear_bit(B_WRITING, &b->state); 143262306a36Sopenharmony_ci smp_mb__after_atomic(); 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci wake_up_bit(&b->state, B_WRITING); 143562306a36Sopenharmony_ci} 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci/* 143862306a36Sopenharmony_ci * Initiate a write on a dirty buffer, but don't wait for it. 143962306a36Sopenharmony_ci * 144062306a36Sopenharmony_ci * - If the buffer is not dirty, exit. 144162306a36Sopenharmony_ci * - If there some previous write going on, wait for it to finish (we can't 144262306a36Sopenharmony_ci * have two writes on the same buffer simultaneously). 144362306a36Sopenharmony_ci * - Submit our write and don't wait on it. We set B_WRITING indicating 144462306a36Sopenharmony_ci * that there is a write in progress. 144562306a36Sopenharmony_ci */ 144662306a36Sopenharmony_cistatic void __write_dirty_buffer(struct dm_buffer *b, 144762306a36Sopenharmony_ci struct list_head *write_list) 144862306a36Sopenharmony_ci{ 144962306a36Sopenharmony_ci if (!test_bit(B_DIRTY, &b->state)) 145062306a36Sopenharmony_ci return; 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci clear_bit(B_DIRTY, &b->state); 145362306a36Sopenharmony_ci wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci b->write_start = b->dirty_start; 145662306a36Sopenharmony_ci b->write_end = b->dirty_end; 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci if (!write_list) 145962306a36Sopenharmony_ci submit_io(b, REQ_OP_WRITE, write_endio); 146062306a36Sopenharmony_ci else 146162306a36Sopenharmony_ci list_add_tail(&b->write_list, write_list); 146262306a36Sopenharmony_ci} 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_cistatic void __flush_write_list(struct list_head *write_list) 146562306a36Sopenharmony_ci{ 146662306a36Sopenharmony_ci struct blk_plug plug; 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci blk_start_plug(&plug); 146962306a36Sopenharmony_ci while (!list_empty(write_list)) { 147062306a36Sopenharmony_ci struct dm_buffer *b = 147162306a36Sopenharmony_ci list_entry(write_list->next, struct dm_buffer, write_list); 147262306a36Sopenharmony_ci list_del(&b->write_list); 147362306a36Sopenharmony_ci submit_io(b, REQ_OP_WRITE, write_endio); 147462306a36Sopenharmony_ci cond_resched(); 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci blk_finish_plug(&plug); 147762306a36Sopenharmony_ci} 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci/* 148062306a36Sopenharmony_ci * Wait until any activity on the buffer finishes. Possibly write the 148162306a36Sopenharmony_ci * buffer if it is dirty. When this function finishes, there is no I/O 148262306a36Sopenharmony_ci * running on the buffer and the buffer is not dirty. 148362306a36Sopenharmony_ci */ 148462306a36Sopenharmony_cistatic void __make_buffer_clean(struct dm_buffer *b) 148562306a36Sopenharmony_ci{ 148662306a36Sopenharmony_ci BUG_ON(atomic_read(&b->hold_count)); 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci /* smp_load_acquire() pairs with read_endio()'s smp_mb__before_atomic() */ 148962306a36Sopenharmony_ci if (!smp_load_acquire(&b->state)) /* fast case */ 149062306a36Sopenharmony_ci return; 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 149362306a36Sopenharmony_ci __write_dirty_buffer(b, NULL); 149462306a36Sopenharmony_ci wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 149562306a36Sopenharmony_ci} 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_cistatic enum evict_result is_clean(struct dm_buffer *b, void *context) 149862306a36Sopenharmony_ci{ 149962306a36Sopenharmony_ci struct dm_bufio_client *c = context; 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci /* These should never happen */ 150262306a36Sopenharmony_ci if (WARN_ON_ONCE(test_bit(B_WRITING, &b->state))) 150362306a36Sopenharmony_ci return ER_DONT_EVICT; 150462306a36Sopenharmony_ci if (WARN_ON_ONCE(test_bit(B_DIRTY, &b->state))) 150562306a36Sopenharmony_ci return ER_DONT_EVICT; 150662306a36Sopenharmony_ci if (WARN_ON_ONCE(b->list_mode != LIST_CLEAN)) 150762306a36Sopenharmony_ci return ER_DONT_EVICT; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep && 151062306a36Sopenharmony_ci unlikely(test_bit(B_READING, &b->state))) 151162306a36Sopenharmony_ci return ER_DONT_EVICT; 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci return ER_EVICT; 151462306a36Sopenharmony_ci} 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_cistatic enum evict_result is_dirty(struct dm_buffer *b, void *context) 151762306a36Sopenharmony_ci{ 151862306a36Sopenharmony_ci /* These should never happen */ 151962306a36Sopenharmony_ci if (WARN_ON_ONCE(test_bit(B_READING, &b->state))) 152062306a36Sopenharmony_ci return ER_DONT_EVICT; 152162306a36Sopenharmony_ci if (WARN_ON_ONCE(b->list_mode != LIST_DIRTY)) 152262306a36Sopenharmony_ci return ER_DONT_EVICT; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci return ER_EVICT; 152562306a36Sopenharmony_ci} 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci/* 152862306a36Sopenharmony_ci * Find some buffer that is not held by anybody, clean it, unlink it and 152962306a36Sopenharmony_ci * return it. 153062306a36Sopenharmony_ci */ 153162306a36Sopenharmony_cistatic struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) 153262306a36Sopenharmony_ci{ 153362306a36Sopenharmony_ci struct dm_buffer *b; 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci b = cache_evict(&c->cache, LIST_CLEAN, is_clean, c); 153662306a36Sopenharmony_ci if (b) { 153762306a36Sopenharmony_ci /* this also waits for pending reads */ 153862306a36Sopenharmony_ci __make_buffer_clean(b); 153962306a36Sopenharmony_ci return b; 154062306a36Sopenharmony_ci } 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 154362306a36Sopenharmony_ci return NULL; 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci b = cache_evict(&c->cache, LIST_DIRTY, is_dirty, NULL); 154662306a36Sopenharmony_ci if (b) { 154762306a36Sopenharmony_ci __make_buffer_clean(b); 154862306a36Sopenharmony_ci return b; 154962306a36Sopenharmony_ci } 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci return NULL; 155262306a36Sopenharmony_ci} 155362306a36Sopenharmony_ci 155462306a36Sopenharmony_ci/* 155562306a36Sopenharmony_ci * Wait until some other threads free some buffer or release hold count on 155662306a36Sopenharmony_ci * some buffer. 155762306a36Sopenharmony_ci * 155862306a36Sopenharmony_ci * This function is entered with c->lock held, drops it and regains it 155962306a36Sopenharmony_ci * before exiting. 156062306a36Sopenharmony_ci */ 156162306a36Sopenharmony_cistatic void __wait_for_free_buffer(struct dm_bufio_client *c) 156262306a36Sopenharmony_ci{ 156362306a36Sopenharmony_ci DECLARE_WAITQUEUE(wait, current); 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci add_wait_queue(&c->free_buffer_wait, &wait); 156662306a36Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 156762306a36Sopenharmony_ci dm_bufio_unlock(c); 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci /* 157062306a36Sopenharmony_ci * It's possible to miss a wake up event since we don't always 157162306a36Sopenharmony_ci * hold c->lock when wake_up is called. So we have a timeout here, 157262306a36Sopenharmony_ci * just in case. 157362306a36Sopenharmony_ci */ 157462306a36Sopenharmony_ci io_schedule_timeout(5 * HZ); 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci remove_wait_queue(&c->free_buffer_wait, &wait); 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci dm_bufio_lock(c); 157962306a36Sopenharmony_ci} 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_cienum new_flag { 158262306a36Sopenharmony_ci NF_FRESH = 0, 158362306a36Sopenharmony_ci NF_READ = 1, 158462306a36Sopenharmony_ci NF_GET = 2, 158562306a36Sopenharmony_ci NF_PREFETCH = 3 158662306a36Sopenharmony_ci}; 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci/* 158962306a36Sopenharmony_ci * Allocate a new buffer. If the allocation is not possible, wait until 159062306a36Sopenharmony_ci * some other thread frees a buffer. 159162306a36Sopenharmony_ci * 159262306a36Sopenharmony_ci * May drop the lock and regain it. 159362306a36Sopenharmony_ci */ 159462306a36Sopenharmony_cistatic struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) 159562306a36Sopenharmony_ci{ 159662306a36Sopenharmony_ci struct dm_buffer *b; 159762306a36Sopenharmony_ci bool tried_noio_alloc = false; 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_ci /* 160062306a36Sopenharmony_ci * dm-bufio is resistant to allocation failures (it just keeps 160162306a36Sopenharmony_ci * one buffer reserved in cases all the allocations fail). 160262306a36Sopenharmony_ci * So set flags to not try too hard: 160362306a36Sopenharmony_ci * GFP_NOWAIT: don't wait; if we need to sleep we'll release our 160462306a36Sopenharmony_ci * mutex and wait ourselves. 160562306a36Sopenharmony_ci * __GFP_NORETRY: don't retry and rather return failure 160662306a36Sopenharmony_ci * __GFP_NOMEMALLOC: don't use emergency reserves 160762306a36Sopenharmony_ci * __GFP_NOWARN: don't print a warning in case of failure 160862306a36Sopenharmony_ci * 160962306a36Sopenharmony_ci * For debugging, if we set the cache size to 1, no new buffers will 161062306a36Sopenharmony_ci * be allocated. 161162306a36Sopenharmony_ci */ 161262306a36Sopenharmony_ci while (1) { 161362306a36Sopenharmony_ci if (dm_bufio_cache_size_latch != 1) { 161462306a36Sopenharmony_ci b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 161562306a36Sopenharmony_ci if (b) 161662306a36Sopenharmony_ci return b; 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci if (nf == NF_PREFETCH) 162062306a36Sopenharmony_ci return NULL; 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) { 162362306a36Sopenharmony_ci dm_bufio_unlock(c); 162462306a36Sopenharmony_ci b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 162562306a36Sopenharmony_ci dm_bufio_lock(c); 162662306a36Sopenharmony_ci if (b) 162762306a36Sopenharmony_ci return b; 162862306a36Sopenharmony_ci tried_noio_alloc = true; 162962306a36Sopenharmony_ci } 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci if (!list_empty(&c->reserved_buffers)) { 163262306a36Sopenharmony_ci b = list_to_buffer(c->reserved_buffers.next); 163362306a36Sopenharmony_ci list_del(&b->lru.list); 163462306a36Sopenharmony_ci c->need_reserved_buffers++; 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci return b; 163762306a36Sopenharmony_ci } 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci b = __get_unclaimed_buffer(c); 164062306a36Sopenharmony_ci if (b) 164162306a36Sopenharmony_ci return b; 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci __wait_for_free_buffer(c); 164462306a36Sopenharmony_ci } 164562306a36Sopenharmony_ci} 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_cistatic struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf) 164862306a36Sopenharmony_ci{ 164962306a36Sopenharmony_ci struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf); 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci if (!b) 165262306a36Sopenharmony_ci return NULL; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci if (c->alloc_callback) 165562306a36Sopenharmony_ci c->alloc_callback(b); 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci return b; 165862306a36Sopenharmony_ci} 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci/* 166162306a36Sopenharmony_ci * Free a buffer and wake other threads waiting for free buffers. 166262306a36Sopenharmony_ci */ 166362306a36Sopenharmony_cistatic void __free_buffer_wake(struct dm_buffer *b) 166462306a36Sopenharmony_ci{ 166562306a36Sopenharmony_ci struct dm_bufio_client *c = b->c; 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci b->block = -1; 166862306a36Sopenharmony_ci if (!c->need_reserved_buffers) 166962306a36Sopenharmony_ci free_buffer(b); 167062306a36Sopenharmony_ci else { 167162306a36Sopenharmony_ci list_add(&b->lru.list, &c->reserved_buffers); 167262306a36Sopenharmony_ci c->need_reserved_buffers--; 167362306a36Sopenharmony_ci } 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci /* 167662306a36Sopenharmony_ci * We hold the bufio lock here, so no one can add entries to the 167762306a36Sopenharmony_ci * wait queue anyway. 167862306a36Sopenharmony_ci */ 167962306a36Sopenharmony_ci if (unlikely(waitqueue_active(&c->free_buffer_wait))) 168062306a36Sopenharmony_ci wake_up(&c->free_buffer_wait); 168162306a36Sopenharmony_ci} 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_cistatic enum evict_result cleaned(struct dm_buffer *b, void *context) 168462306a36Sopenharmony_ci{ 168562306a36Sopenharmony_ci if (WARN_ON_ONCE(test_bit(B_READING, &b->state))) 168662306a36Sopenharmony_ci return ER_DONT_EVICT; /* should never happen */ 168762306a36Sopenharmony_ci 168862306a36Sopenharmony_ci if (test_bit(B_DIRTY, &b->state) || test_bit(B_WRITING, &b->state)) 168962306a36Sopenharmony_ci return ER_DONT_EVICT; 169062306a36Sopenharmony_ci else 169162306a36Sopenharmony_ci return ER_EVICT; 169262306a36Sopenharmony_ci} 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_cistatic void __move_clean_buffers(struct dm_bufio_client *c) 169562306a36Sopenharmony_ci{ 169662306a36Sopenharmony_ci cache_mark_many(&c->cache, LIST_DIRTY, LIST_CLEAN, cleaned, NULL); 169762306a36Sopenharmony_ci} 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_cistruct write_context { 170062306a36Sopenharmony_ci int no_wait; 170162306a36Sopenharmony_ci struct list_head *write_list; 170262306a36Sopenharmony_ci}; 170362306a36Sopenharmony_ci 170462306a36Sopenharmony_cistatic enum it_action write_one(struct dm_buffer *b, void *context) 170562306a36Sopenharmony_ci{ 170662306a36Sopenharmony_ci struct write_context *wc = context; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci if (wc->no_wait && test_bit(B_WRITING, &b->state)) 170962306a36Sopenharmony_ci return IT_COMPLETE; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci __write_dirty_buffer(b, wc->write_list); 171262306a36Sopenharmony_ci return IT_NEXT; 171362306a36Sopenharmony_ci} 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_cistatic void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, 171662306a36Sopenharmony_ci struct list_head *write_list) 171762306a36Sopenharmony_ci{ 171862306a36Sopenharmony_ci struct write_context wc = {.no_wait = no_wait, .write_list = write_list}; 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci __move_clean_buffers(c); 172162306a36Sopenharmony_ci cache_iterate(&c->cache, LIST_DIRTY, write_one, &wc); 172262306a36Sopenharmony_ci} 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci/* 172562306a36Sopenharmony_ci * Check if we're over watermark. 172662306a36Sopenharmony_ci * If we are over threshold_buffers, start freeing buffers. 172762306a36Sopenharmony_ci * If we're over "limit_buffers", block until we get under the limit. 172862306a36Sopenharmony_ci */ 172962306a36Sopenharmony_cistatic void __check_watermark(struct dm_bufio_client *c, 173062306a36Sopenharmony_ci struct list_head *write_list) 173162306a36Sopenharmony_ci{ 173262306a36Sopenharmony_ci if (cache_count(&c->cache, LIST_DIRTY) > 173362306a36Sopenharmony_ci cache_count(&c->cache, LIST_CLEAN) * DM_BUFIO_WRITEBACK_RATIO) 173462306a36Sopenharmony_ci __write_dirty_buffers_async(c, 1, write_list); 173562306a36Sopenharmony_ci} 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci/* 173862306a36Sopenharmony_ci *-------------------------------------------------------------- 173962306a36Sopenharmony_ci * Getting a buffer 174062306a36Sopenharmony_ci *-------------------------------------------------------------- 174162306a36Sopenharmony_ci */ 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_cistatic void cache_put_and_wake(struct dm_bufio_client *c, struct dm_buffer *b) 174462306a36Sopenharmony_ci{ 174562306a36Sopenharmony_ci /* 174662306a36Sopenharmony_ci * Relying on waitqueue_active() is racey, but we sleep 174762306a36Sopenharmony_ci * with schedule_timeout anyway. 174862306a36Sopenharmony_ci */ 174962306a36Sopenharmony_ci if (cache_put(&c->cache, b) && 175062306a36Sopenharmony_ci unlikely(waitqueue_active(&c->free_buffer_wait))) 175162306a36Sopenharmony_ci wake_up(&c->free_buffer_wait); 175262306a36Sopenharmony_ci} 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci/* 175562306a36Sopenharmony_ci * This assumes you have already checked the cache to see if the buffer 175662306a36Sopenharmony_ci * is already present (it will recheck after dropping the lock for allocation). 175762306a36Sopenharmony_ci */ 175862306a36Sopenharmony_cistatic struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, 175962306a36Sopenharmony_ci enum new_flag nf, int *need_submit, 176062306a36Sopenharmony_ci struct list_head *write_list) 176162306a36Sopenharmony_ci{ 176262306a36Sopenharmony_ci struct dm_buffer *b, *new_b = NULL; 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci *need_submit = 0; 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci /* This can't be called with NF_GET */ 176762306a36Sopenharmony_ci if (WARN_ON_ONCE(nf == NF_GET)) 176862306a36Sopenharmony_ci return NULL; 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci new_b = __alloc_buffer_wait(c, nf); 177162306a36Sopenharmony_ci if (!new_b) 177262306a36Sopenharmony_ci return NULL; 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_ci /* 177562306a36Sopenharmony_ci * We've had a period where the mutex was unlocked, so need to 177662306a36Sopenharmony_ci * recheck the buffer tree. 177762306a36Sopenharmony_ci */ 177862306a36Sopenharmony_ci b = cache_get(&c->cache, block); 177962306a36Sopenharmony_ci if (b) { 178062306a36Sopenharmony_ci __free_buffer_wake(new_b); 178162306a36Sopenharmony_ci goto found_buffer; 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci __check_watermark(c, write_list); 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci b = new_b; 178762306a36Sopenharmony_ci atomic_set(&b->hold_count, 1); 178862306a36Sopenharmony_ci WRITE_ONCE(b->last_accessed, jiffies); 178962306a36Sopenharmony_ci b->block = block; 179062306a36Sopenharmony_ci b->read_error = 0; 179162306a36Sopenharmony_ci b->write_error = 0; 179262306a36Sopenharmony_ci b->list_mode = LIST_CLEAN; 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci if (nf == NF_FRESH) 179562306a36Sopenharmony_ci b->state = 0; 179662306a36Sopenharmony_ci else { 179762306a36Sopenharmony_ci b->state = 1 << B_READING; 179862306a36Sopenharmony_ci *need_submit = 1; 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci /* 180262306a36Sopenharmony_ci * We mustn't insert into the cache until the B_READING state 180362306a36Sopenharmony_ci * is set. Otherwise another thread could get it and use 180462306a36Sopenharmony_ci * it before it had been read. 180562306a36Sopenharmony_ci */ 180662306a36Sopenharmony_ci cache_insert(&c->cache, b); 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci return b; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_cifound_buffer: 181162306a36Sopenharmony_ci if (nf == NF_PREFETCH) { 181262306a36Sopenharmony_ci cache_put_and_wake(c, b); 181362306a36Sopenharmony_ci return NULL; 181462306a36Sopenharmony_ci } 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci /* 181762306a36Sopenharmony_ci * Note: it is essential that we don't wait for the buffer to be 181862306a36Sopenharmony_ci * read if dm_bufio_get function is used. Both dm_bufio_get and 181962306a36Sopenharmony_ci * dm_bufio_prefetch can be used in the driver request routine. 182062306a36Sopenharmony_ci * If the user called both dm_bufio_prefetch and dm_bufio_get on 182162306a36Sopenharmony_ci * the same buffer, it would deadlock if we waited. 182262306a36Sopenharmony_ci */ 182362306a36Sopenharmony_ci if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) { 182462306a36Sopenharmony_ci cache_put_and_wake(c, b); 182562306a36Sopenharmony_ci return NULL; 182662306a36Sopenharmony_ci } 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci return b; 182962306a36Sopenharmony_ci} 183062306a36Sopenharmony_ci 183162306a36Sopenharmony_ci/* 183262306a36Sopenharmony_ci * The endio routine for reading: set the error, clear the bit and wake up 183362306a36Sopenharmony_ci * anyone waiting on the buffer. 183462306a36Sopenharmony_ci */ 183562306a36Sopenharmony_cistatic void read_endio(struct dm_buffer *b, blk_status_t status) 183662306a36Sopenharmony_ci{ 183762306a36Sopenharmony_ci b->read_error = status; 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci BUG_ON(!test_bit(B_READING, &b->state)); 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci smp_mb__before_atomic(); 184262306a36Sopenharmony_ci clear_bit(B_READING, &b->state); 184362306a36Sopenharmony_ci smp_mb__after_atomic(); 184462306a36Sopenharmony_ci 184562306a36Sopenharmony_ci wake_up_bit(&b->state, B_READING); 184662306a36Sopenharmony_ci} 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci/* 184962306a36Sopenharmony_ci * A common routine for dm_bufio_new and dm_bufio_read. Operation of these 185062306a36Sopenharmony_ci * functions is similar except that dm_bufio_new doesn't read the 185162306a36Sopenharmony_ci * buffer from the disk (assuming that the caller overwrites all the data 185262306a36Sopenharmony_ci * and uses dm_bufio_mark_buffer_dirty to write new data back). 185362306a36Sopenharmony_ci */ 185462306a36Sopenharmony_cistatic void *new_read(struct dm_bufio_client *c, sector_t block, 185562306a36Sopenharmony_ci enum new_flag nf, struct dm_buffer **bp) 185662306a36Sopenharmony_ci{ 185762306a36Sopenharmony_ci int need_submit = 0; 185862306a36Sopenharmony_ci struct dm_buffer *b; 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci LIST_HEAD(write_list); 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci *bp = NULL; 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci /* 186562306a36Sopenharmony_ci * Fast path, hopefully the block is already in the cache. No need 186662306a36Sopenharmony_ci * to get the client lock for this. 186762306a36Sopenharmony_ci */ 186862306a36Sopenharmony_ci b = cache_get(&c->cache, block); 186962306a36Sopenharmony_ci if (b) { 187062306a36Sopenharmony_ci if (nf == NF_PREFETCH) { 187162306a36Sopenharmony_ci cache_put_and_wake(c, b); 187262306a36Sopenharmony_ci return NULL; 187362306a36Sopenharmony_ci } 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* 187662306a36Sopenharmony_ci * Note: it is essential that we don't wait for the buffer to be 187762306a36Sopenharmony_ci * read if dm_bufio_get function is used. Both dm_bufio_get and 187862306a36Sopenharmony_ci * dm_bufio_prefetch can be used in the driver request routine. 187962306a36Sopenharmony_ci * If the user called both dm_bufio_prefetch and dm_bufio_get on 188062306a36Sopenharmony_ci * the same buffer, it would deadlock if we waited. 188162306a36Sopenharmony_ci */ 188262306a36Sopenharmony_ci if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) { 188362306a36Sopenharmony_ci cache_put_and_wake(c, b); 188462306a36Sopenharmony_ci return NULL; 188562306a36Sopenharmony_ci } 188662306a36Sopenharmony_ci } 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci if (!b) { 188962306a36Sopenharmony_ci if (nf == NF_GET) 189062306a36Sopenharmony_ci return NULL; 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ci dm_bufio_lock(c); 189362306a36Sopenharmony_ci b = __bufio_new(c, block, nf, &need_submit, &write_list); 189462306a36Sopenharmony_ci dm_bufio_unlock(c); 189562306a36Sopenharmony_ci } 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 189862306a36Sopenharmony_ci if (b && (atomic_read(&b->hold_count) == 1)) 189962306a36Sopenharmony_ci buffer_record_stack(b); 190062306a36Sopenharmony_ci#endif 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci __flush_write_list(&write_list); 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci if (!b) 190562306a36Sopenharmony_ci return NULL; 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci if (need_submit) 190862306a36Sopenharmony_ci submit_io(b, REQ_OP_READ, read_endio); 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci if (nf != NF_GET) /* we already tested this condition above */ 191162306a36Sopenharmony_ci wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 191262306a36Sopenharmony_ci 191362306a36Sopenharmony_ci if (b->read_error) { 191462306a36Sopenharmony_ci int error = blk_status_to_errno(b->read_error); 191562306a36Sopenharmony_ci 191662306a36Sopenharmony_ci dm_bufio_release(b); 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci return ERR_PTR(error); 191962306a36Sopenharmony_ci } 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci *bp = b; 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci return b->data; 192462306a36Sopenharmony_ci} 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_civoid *dm_bufio_get(struct dm_bufio_client *c, sector_t block, 192762306a36Sopenharmony_ci struct dm_buffer **bp) 192862306a36Sopenharmony_ci{ 192962306a36Sopenharmony_ci return new_read(c, block, NF_GET, bp); 193062306a36Sopenharmony_ci} 193162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get); 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_civoid *dm_bufio_read(struct dm_bufio_client *c, sector_t block, 193462306a36Sopenharmony_ci struct dm_buffer **bp) 193562306a36Sopenharmony_ci{ 193662306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 193762306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 193862306a36Sopenharmony_ci 193962306a36Sopenharmony_ci return new_read(c, block, NF_READ, bp); 194062306a36Sopenharmony_ci} 194162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_read); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_civoid *dm_bufio_new(struct dm_bufio_client *c, sector_t block, 194462306a36Sopenharmony_ci struct dm_buffer **bp) 194562306a36Sopenharmony_ci{ 194662306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 194762306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci return new_read(c, block, NF_FRESH, bp); 195062306a36Sopenharmony_ci} 195162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_new); 195262306a36Sopenharmony_ci 195362306a36Sopenharmony_civoid dm_bufio_prefetch(struct dm_bufio_client *c, 195462306a36Sopenharmony_ci sector_t block, unsigned int n_blocks) 195562306a36Sopenharmony_ci{ 195662306a36Sopenharmony_ci struct blk_plug plug; 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci LIST_HEAD(write_list); 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 196162306a36Sopenharmony_ci return; /* should never happen */ 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci blk_start_plug(&plug); 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci for (; n_blocks--; block++) { 196662306a36Sopenharmony_ci int need_submit; 196762306a36Sopenharmony_ci struct dm_buffer *b; 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_ci b = cache_get(&c->cache, block); 197062306a36Sopenharmony_ci if (b) { 197162306a36Sopenharmony_ci /* already in cache */ 197262306a36Sopenharmony_ci cache_put_and_wake(c, b); 197362306a36Sopenharmony_ci continue; 197462306a36Sopenharmony_ci } 197562306a36Sopenharmony_ci 197662306a36Sopenharmony_ci dm_bufio_lock(c); 197762306a36Sopenharmony_ci b = __bufio_new(c, block, NF_PREFETCH, &need_submit, 197862306a36Sopenharmony_ci &write_list); 197962306a36Sopenharmony_ci if (unlikely(!list_empty(&write_list))) { 198062306a36Sopenharmony_ci dm_bufio_unlock(c); 198162306a36Sopenharmony_ci blk_finish_plug(&plug); 198262306a36Sopenharmony_ci __flush_write_list(&write_list); 198362306a36Sopenharmony_ci blk_start_plug(&plug); 198462306a36Sopenharmony_ci dm_bufio_lock(c); 198562306a36Sopenharmony_ci } 198662306a36Sopenharmony_ci if (unlikely(b != NULL)) { 198762306a36Sopenharmony_ci dm_bufio_unlock(c); 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci if (need_submit) 199062306a36Sopenharmony_ci submit_io(b, REQ_OP_READ, read_endio); 199162306a36Sopenharmony_ci dm_bufio_release(b); 199262306a36Sopenharmony_ci 199362306a36Sopenharmony_ci cond_resched(); 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci if (!n_blocks) 199662306a36Sopenharmony_ci goto flush_plug; 199762306a36Sopenharmony_ci dm_bufio_lock(c); 199862306a36Sopenharmony_ci } 199962306a36Sopenharmony_ci dm_bufio_unlock(c); 200062306a36Sopenharmony_ci } 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ciflush_plug: 200362306a36Sopenharmony_ci blk_finish_plug(&plug); 200462306a36Sopenharmony_ci} 200562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_prefetch); 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_civoid dm_bufio_release(struct dm_buffer *b) 200862306a36Sopenharmony_ci{ 200962306a36Sopenharmony_ci struct dm_bufio_client *c = b->c; 201062306a36Sopenharmony_ci 201162306a36Sopenharmony_ci /* 201262306a36Sopenharmony_ci * If there were errors on the buffer, and the buffer is not 201362306a36Sopenharmony_ci * to be written, free the buffer. There is no point in caching 201462306a36Sopenharmony_ci * invalid buffer. 201562306a36Sopenharmony_ci */ 201662306a36Sopenharmony_ci if ((b->read_error || b->write_error) && 201762306a36Sopenharmony_ci !test_bit_acquire(B_READING, &b->state) && 201862306a36Sopenharmony_ci !test_bit(B_WRITING, &b->state) && 201962306a36Sopenharmony_ci !test_bit(B_DIRTY, &b->state)) { 202062306a36Sopenharmony_ci dm_bufio_lock(c); 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci /* cache remove can fail if there are other holders */ 202362306a36Sopenharmony_ci if (cache_remove(&c->cache, b)) { 202462306a36Sopenharmony_ci __free_buffer_wake(b); 202562306a36Sopenharmony_ci dm_bufio_unlock(c); 202662306a36Sopenharmony_ci return; 202762306a36Sopenharmony_ci } 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci dm_bufio_unlock(c); 203062306a36Sopenharmony_ci } 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci cache_put_and_wake(c, b); 203362306a36Sopenharmony_ci} 203462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_release); 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_civoid dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 203762306a36Sopenharmony_ci unsigned int start, unsigned int end) 203862306a36Sopenharmony_ci{ 203962306a36Sopenharmony_ci struct dm_bufio_client *c = b->c; 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci BUG_ON(start >= end); 204262306a36Sopenharmony_ci BUG_ON(end > b->c->block_size); 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci dm_bufio_lock(c); 204562306a36Sopenharmony_ci 204662306a36Sopenharmony_ci BUG_ON(test_bit(B_READING, &b->state)); 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci if (!test_and_set_bit(B_DIRTY, &b->state)) { 204962306a36Sopenharmony_ci b->dirty_start = start; 205062306a36Sopenharmony_ci b->dirty_end = end; 205162306a36Sopenharmony_ci cache_mark(&c->cache, b, LIST_DIRTY); 205262306a36Sopenharmony_ci } else { 205362306a36Sopenharmony_ci if (start < b->dirty_start) 205462306a36Sopenharmony_ci b->dirty_start = start; 205562306a36Sopenharmony_ci if (end > b->dirty_end) 205662306a36Sopenharmony_ci b->dirty_end = end; 205762306a36Sopenharmony_ci } 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci dm_bufio_unlock(c); 206062306a36Sopenharmony_ci} 206162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); 206262306a36Sopenharmony_ci 206362306a36Sopenharmony_civoid dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 206462306a36Sopenharmony_ci{ 206562306a36Sopenharmony_ci dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); 206662306a36Sopenharmony_ci} 206762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_civoid dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) 207062306a36Sopenharmony_ci{ 207162306a36Sopenharmony_ci LIST_HEAD(write_list); 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 207462306a36Sopenharmony_ci return; /* should never happen */ 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci dm_bufio_lock(c); 207762306a36Sopenharmony_ci __write_dirty_buffers_async(c, 0, &write_list); 207862306a36Sopenharmony_ci dm_bufio_unlock(c); 207962306a36Sopenharmony_ci __flush_write_list(&write_list); 208062306a36Sopenharmony_ci} 208162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async); 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci/* 208462306a36Sopenharmony_ci * For performance, it is essential that the buffers are written asynchronously 208562306a36Sopenharmony_ci * and simultaneously (so that the block layer can merge the writes) and then 208662306a36Sopenharmony_ci * waited upon. 208762306a36Sopenharmony_ci * 208862306a36Sopenharmony_ci * Finally, we flush hardware disk cache. 208962306a36Sopenharmony_ci */ 209062306a36Sopenharmony_cistatic bool is_writing(struct lru_entry *e, void *context) 209162306a36Sopenharmony_ci{ 209262306a36Sopenharmony_ci struct dm_buffer *b = le_to_buffer(e); 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci return test_bit(B_WRITING, &b->state); 209562306a36Sopenharmony_ci} 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ciint dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 209862306a36Sopenharmony_ci{ 209962306a36Sopenharmony_ci int a, f; 210062306a36Sopenharmony_ci unsigned long nr_buffers; 210162306a36Sopenharmony_ci struct lru_entry *e; 210262306a36Sopenharmony_ci struct lru_iter it; 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci LIST_HEAD(write_list); 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci dm_bufio_lock(c); 210762306a36Sopenharmony_ci __write_dirty_buffers_async(c, 0, &write_list); 210862306a36Sopenharmony_ci dm_bufio_unlock(c); 210962306a36Sopenharmony_ci __flush_write_list(&write_list); 211062306a36Sopenharmony_ci dm_bufio_lock(c); 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci nr_buffers = cache_count(&c->cache, LIST_DIRTY); 211362306a36Sopenharmony_ci lru_iter_begin(&c->cache.lru[LIST_DIRTY], &it); 211462306a36Sopenharmony_ci while ((e = lru_iter_next(&it, is_writing, c))) { 211562306a36Sopenharmony_ci struct dm_buffer *b = le_to_buffer(e); 211662306a36Sopenharmony_ci __cache_inc_buffer(b); 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci BUG_ON(test_bit(B_READING, &b->state)); 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci if (nr_buffers) { 212162306a36Sopenharmony_ci nr_buffers--; 212262306a36Sopenharmony_ci dm_bufio_unlock(c); 212362306a36Sopenharmony_ci wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 212462306a36Sopenharmony_ci dm_bufio_lock(c); 212562306a36Sopenharmony_ci } else { 212662306a36Sopenharmony_ci wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 212762306a36Sopenharmony_ci } 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci if (!test_bit(B_DIRTY, &b->state) && !test_bit(B_WRITING, &b->state)) 213062306a36Sopenharmony_ci cache_mark(&c->cache, b, LIST_CLEAN); 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci cache_put_and_wake(c, b); 213362306a36Sopenharmony_ci 213462306a36Sopenharmony_ci cond_resched(); 213562306a36Sopenharmony_ci } 213662306a36Sopenharmony_ci lru_iter_end(&it); 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci wake_up(&c->free_buffer_wait); 213962306a36Sopenharmony_ci dm_bufio_unlock(c); 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ci a = xchg(&c->async_write_error, 0); 214262306a36Sopenharmony_ci f = dm_bufio_issue_flush(c); 214362306a36Sopenharmony_ci if (a) 214462306a36Sopenharmony_ci return a; 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci return f; 214762306a36Sopenharmony_ci} 214862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci/* 215162306a36Sopenharmony_ci * Use dm-io to send an empty barrier to flush the device. 215262306a36Sopenharmony_ci */ 215362306a36Sopenharmony_ciint dm_bufio_issue_flush(struct dm_bufio_client *c) 215462306a36Sopenharmony_ci{ 215562306a36Sopenharmony_ci struct dm_io_request io_req = { 215662306a36Sopenharmony_ci .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, 215762306a36Sopenharmony_ci .mem.type = DM_IO_KMEM, 215862306a36Sopenharmony_ci .mem.ptr.addr = NULL, 215962306a36Sopenharmony_ci .client = c->dm_io, 216062306a36Sopenharmony_ci }; 216162306a36Sopenharmony_ci struct dm_io_region io_reg = { 216262306a36Sopenharmony_ci .bdev = c->bdev, 216362306a36Sopenharmony_ci .sector = 0, 216462306a36Sopenharmony_ci .count = 0, 216562306a36Sopenharmony_ci }; 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 216862306a36Sopenharmony_ci return -EINVAL; 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); 217162306a36Sopenharmony_ci} 217262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_issue_flush); 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci/* 217562306a36Sopenharmony_ci * Use dm-io to send a discard request to flush the device. 217662306a36Sopenharmony_ci */ 217762306a36Sopenharmony_ciint dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) 217862306a36Sopenharmony_ci{ 217962306a36Sopenharmony_ci struct dm_io_request io_req = { 218062306a36Sopenharmony_ci .bi_opf = REQ_OP_DISCARD | REQ_SYNC, 218162306a36Sopenharmony_ci .mem.type = DM_IO_KMEM, 218262306a36Sopenharmony_ci .mem.ptr.addr = NULL, 218362306a36Sopenharmony_ci .client = c->dm_io, 218462306a36Sopenharmony_ci }; 218562306a36Sopenharmony_ci struct dm_io_region io_reg = { 218662306a36Sopenharmony_ci .bdev = c->bdev, 218762306a36Sopenharmony_ci .sector = block_to_sector(c, block), 218862306a36Sopenharmony_ci .count = block_to_sector(c, count), 218962306a36Sopenharmony_ci }; 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_bufio_in_request())) 219262306a36Sopenharmony_ci return -EINVAL; /* discards are optional */ 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); 219562306a36Sopenharmony_ci} 219662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_issue_discard); 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_cistatic bool forget_buffer(struct dm_bufio_client *c, sector_t block) 219962306a36Sopenharmony_ci{ 220062306a36Sopenharmony_ci struct dm_buffer *b; 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci b = cache_get(&c->cache, block); 220362306a36Sopenharmony_ci if (b) { 220462306a36Sopenharmony_ci if (likely(!smp_load_acquire(&b->state))) { 220562306a36Sopenharmony_ci if (cache_remove(&c->cache, b)) 220662306a36Sopenharmony_ci __free_buffer_wake(b); 220762306a36Sopenharmony_ci else 220862306a36Sopenharmony_ci cache_put_and_wake(c, b); 220962306a36Sopenharmony_ci } else { 221062306a36Sopenharmony_ci cache_put_and_wake(c, b); 221162306a36Sopenharmony_ci } 221262306a36Sopenharmony_ci } 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci return b ? true : false; 221562306a36Sopenharmony_ci} 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci/* 221862306a36Sopenharmony_ci * Free the given buffer. 221962306a36Sopenharmony_ci * 222062306a36Sopenharmony_ci * This is just a hint, if the buffer is in use or dirty, this function 222162306a36Sopenharmony_ci * does nothing. 222262306a36Sopenharmony_ci */ 222362306a36Sopenharmony_civoid dm_bufio_forget(struct dm_bufio_client *c, sector_t block) 222462306a36Sopenharmony_ci{ 222562306a36Sopenharmony_ci dm_bufio_lock(c); 222662306a36Sopenharmony_ci forget_buffer(c, block); 222762306a36Sopenharmony_ci dm_bufio_unlock(c); 222862306a36Sopenharmony_ci} 222962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_forget); 223062306a36Sopenharmony_ci 223162306a36Sopenharmony_cistatic enum evict_result idle(struct dm_buffer *b, void *context) 223262306a36Sopenharmony_ci{ 223362306a36Sopenharmony_ci return b->state ? ER_DONT_EVICT : ER_EVICT; 223462306a36Sopenharmony_ci} 223562306a36Sopenharmony_ci 223662306a36Sopenharmony_civoid dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks) 223762306a36Sopenharmony_ci{ 223862306a36Sopenharmony_ci dm_bufio_lock(c); 223962306a36Sopenharmony_ci cache_remove_range(&c->cache, block, block + n_blocks, idle, __free_buffer_wake); 224062306a36Sopenharmony_ci dm_bufio_unlock(c); 224162306a36Sopenharmony_ci} 224262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_forget_buffers); 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_civoid dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned int n) 224562306a36Sopenharmony_ci{ 224662306a36Sopenharmony_ci c->minimum_buffers = n; 224762306a36Sopenharmony_ci} 224862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers); 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ciunsigned int dm_bufio_get_block_size(struct dm_bufio_client *c) 225162306a36Sopenharmony_ci{ 225262306a36Sopenharmony_ci return c->block_size; 225362306a36Sopenharmony_ci} 225462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_block_size); 225562306a36Sopenharmony_ci 225662306a36Sopenharmony_cisector_t dm_bufio_get_device_size(struct dm_bufio_client *c) 225762306a36Sopenharmony_ci{ 225862306a36Sopenharmony_ci sector_t s = bdev_nr_sectors(c->bdev); 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci if (s >= c->start) 226162306a36Sopenharmony_ci s -= c->start; 226262306a36Sopenharmony_ci else 226362306a36Sopenharmony_ci s = 0; 226462306a36Sopenharmony_ci if (likely(c->sectors_per_block_bits >= 0)) 226562306a36Sopenharmony_ci s >>= c->sectors_per_block_bits; 226662306a36Sopenharmony_ci else 226762306a36Sopenharmony_ci sector_div(s, c->block_size >> SECTOR_SHIFT); 226862306a36Sopenharmony_ci return s; 226962306a36Sopenharmony_ci} 227062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_device_size); 227162306a36Sopenharmony_ci 227262306a36Sopenharmony_cistruct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) 227362306a36Sopenharmony_ci{ 227462306a36Sopenharmony_ci return c->dm_io; 227562306a36Sopenharmony_ci} 227662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); 227762306a36Sopenharmony_ci 227862306a36Sopenharmony_cisector_t dm_bufio_get_block_number(struct dm_buffer *b) 227962306a36Sopenharmony_ci{ 228062306a36Sopenharmony_ci return b->block; 228162306a36Sopenharmony_ci} 228262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_block_number); 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_civoid *dm_bufio_get_block_data(struct dm_buffer *b) 228562306a36Sopenharmony_ci{ 228662306a36Sopenharmony_ci return b->data; 228762306a36Sopenharmony_ci} 228862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_block_data); 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_civoid *dm_bufio_get_aux_data(struct dm_buffer *b) 229162306a36Sopenharmony_ci{ 229262306a36Sopenharmony_ci return b + 1; 229362306a36Sopenharmony_ci} 229462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_aux_data); 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_cistruct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b) 229762306a36Sopenharmony_ci{ 229862306a36Sopenharmony_ci return b->c; 229962306a36Sopenharmony_ci} 230062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_get_client); 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_cistatic enum it_action warn_leak(struct dm_buffer *b, void *context) 230362306a36Sopenharmony_ci{ 230462306a36Sopenharmony_ci bool *warned = context; 230562306a36Sopenharmony_ci 230662306a36Sopenharmony_ci WARN_ON(!(*warned)); 230762306a36Sopenharmony_ci *warned = true; 230862306a36Sopenharmony_ci DMERR("leaked buffer %llx, hold count %u, list %d", 230962306a36Sopenharmony_ci (unsigned long long)b->block, atomic_read(&b->hold_count), b->list_mode); 231062306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 231162306a36Sopenharmony_ci stack_trace_print(b->stack_entries, b->stack_len, 1); 231262306a36Sopenharmony_ci /* mark unclaimed to avoid WARN_ON at end of drop_buffers() */ 231362306a36Sopenharmony_ci atomic_set(&b->hold_count, 0); 231462306a36Sopenharmony_ci#endif 231562306a36Sopenharmony_ci return IT_NEXT; 231662306a36Sopenharmony_ci} 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_cistatic void drop_buffers(struct dm_bufio_client *c) 231962306a36Sopenharmony_ci{ 232062306a36Sopenharmony_ci int i; 232162306a36Sopenharmony_ci struct dm_buffer *b; 232262306a36Sopenharmony_ci 232362306a36Sopenharmony_ci if (WARN_ON(dm_bufio_in_request())) 232462306a36Sopenharmony_ci return; /* should never happen */ 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci /* 232762306a36Sopenharmony_ci * An optimization so that the buffers are not written one-by-one. 232862306a36Sopenharmony_ci */ 232962306a36Sopenharmony_ci dm_bufio_write_dirty_buffers_async(c); 233062306a36Sopenharmony_ci 233162306a36Sopenharmony_ci dm_bufio_lock(c); 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci while ((b = __get_unclaimed_buffer(c))) 233462306a36Sopenharmony_ci __free_buffer_wake(b); 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci for (i = 0; i < LIST_SIZE; i++) { 233762306a36Sopenharmony_ci bool warned = false; 233862306a36Sopenharmony_ci 233962306a36Sopenharmony_ci cache_iterate(&c->cache, i, warn_leak, &warned); 234062306a36Sopenharmony_ci } 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 234362306a36Sopenharmony_ci while ((b = __get_unclaimed_buffer(c))) 234462306a36Sopenharmony_ci __free_buffer_wake(b); 234562306a36Sopenharmony_ci#endif 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci for (i = 0; i < LIST_SIZE; i++) 234862306a36Sopenharmony_ci WARN_ON(cache_count(&c->cache, i)); 234962306a36Sopenharmony_ci 235062306a36Sopenharmony_ci dm_bufio_unlock(c); 235162306a36Sopenharmony_ci} 235262306a36Sopenharmony_ci 235362306a36Sopenharmony_cistatic unsigned long get_retain_buffers(struct dm_bufio_client *c) 235462306a36Sopenharmony_ci{ 235562306a36Sopenharmony_ci unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); 235662306a36Sopenharmony_ci 235762306a36Sopenharmony_ci if (likely(c->sectors_per_block_bits >= 0)) 235862306a36Sopenharmony_ci retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT; 235962306a36Sopenharmony_ci else 236062306a36Sopenharmony_ci retain_bytes /= c->block_size; 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci return retain_bytes; 236362306a36Sopenharmony_ci} 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_cistatic void __scan(struct dm_bufio_client *c) 236662306a36Sopenharmony_ci{ 236762306a36Sopenharmony_ci int l; 236862306a36Sopenharmony_ci struct dm_buffer *b; 236962306a36Sopenharmony_ci unsigned long freed = 0; 237062306a36Sopenharmony_ci unsigned long retain_target = get_retain_buffers(c); 237162306a36Sopenharmony_ci unsigned long count = cache_total(&c->cache); 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci for (l = 0; l < LIST_SIZE; l++) { 237462306a36Sopenharmony_ci while (true) { 237562306a36Sopenharmony_ci if (count - freed <= retain_target) 237662306a36Sopenharmony_ci atomic_long_set(&c->need_shrink, 0); 237762306a36Sopenharmony_ci if (!atomic_long_read(&c->need_shrink)) 237862306a36Sopenharmony_ci break; 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ci b = cache_evict(&c->cache, l, 238162306a36Sopenharmony_ci l == LIST_CLEAN ? is_clean : is_dirty, c); 238262306a36Sopenharmony_ci if (!b) 238362306a36Sopenharmony_ci break; 238462306a36Sopenharmony_ci 238562306a36Sopenharmony_ci __make_buffer_clean(b); 238662306a36Sopenharmony_ci __free_buffer_wake(b); 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci atomic_long_dec(&c->need_shrink); 238962306a36Sopenharmony_ci freed++; 239062306a36Sopenharmony_ci cond_resched(); 239162306a36Sopenharmony_ci } 239262306a36Sopenharmony_ci } 239362306a36Sopenharmony_ci} 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_cistatic void shrink_work(struct work_struct *w) 239662306a36Sopenharmony_ci{ 239762306a36Sopenharmony_ci struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci dm_bufio_lock(c); 240062306a36Sopenharmony_ci __scan(c); 240162306a36Sopenharmony_ci dm_bufio_unlock(c); 240262306a36Sopenharmony_ci} 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_cistatic unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 240562306a36Sopenharmony_ci{ 240662306a36Sopenharmony_ci struct dm_bufio_client *c; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci c = container_of(shrink, struct dm_bufio_client, shrinker); 240962306a36Sopenharmony_ci atomic_long_add(sc->nr_to_scan, &c->need_shrink); 241062306a36Sopenharmony_ci queue_work(dm_bufio_wq, &c->shrink_work); 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci return sc->nr_to_scan; 241362306a36Sopenharmony_ci} 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_cistatic unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 241662306a36Sopenharmony_ci{ 241762306a36Sopenharmony_ci struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); 241862306a36Sopenharmony_ci unsigned long count = cache_total(&c->cache); 241962306a36Sopenharmony_ci unsigned long retain_target = get_retain_buffers(c); 242062306a36Sopenharmony_ci unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); 242162306a36Sopenharmony_ci 242262306a36Sopenharmony_ci if (unlikely(count < retain_target)) 242362306a36Sopenharmony_ci count = 0; 242462306a36Sopenharmony_ci else 242562306a36Sopenharmony_ci count -= retain_target; 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci if (unlikely(count < queued_for_cleanup)) 242862306a36Sopenharmony_ci count = 0; 242962306a36Sopenharmony_ci else 243062306a36Sopenharmony_ci count -= queued_for_cleanup; 243162306a36Sopenharmony_ci 243262306a36Sopenharmony_ci return count; 243362306a36Sopenharmony_ci} 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci/* 243662306a36Sopenharmony_ci * Create the buffering interface 243762306a36Sopenharmony_ci */ 243862306a36Sopenharmony_cistruct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned int block_size, 243962306a36Sopenharmony_ci unsigned int reserved_buffers, unsigned int aux_size, 244062306a36Sopenharmony_ci void (*alloc_callback)(struct dm_buffer *), 244162306a36Sopenharmony_ci void (*write_callback)(struct dm_buffer *), 244262306a36Sopenharmony_ci unsigned int flags) 244362306a36Sopenharmony_ci{ 244462306a36Sopenharmony_ci int r; 244562306a36Sopenharmony_ci unsigned int num_locks; 244662306a36Sopenharmony_ci struct dm_bufio_client *c; 244762306a36Sopenharmony_ci char slab_name[27]; 244862306a36Sopenharmony_ci 244962306a36Sopenharmony_ci if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { 245062306a36Sopenharmony_ci DMERR("%s: block size not specified or is not multiple of 512b", __func__); 245162306a36Sopenharmony_ci r = -EINVAL; 245262306a36Sopenharmony_ci goto bad_client; 245362306a36Sopenharmony_ci } 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci num_locks = dm_num_hash_locks(); 245662306a36Sopenharmony_ci c = kzalloc(sizeof(*c) + (num_locks * sizeof(struct buffer_tree)), GFP_KERNEL); 245762306a36Sopenharmony_ci if (!c) { 245862306a36Sopenharmony_ci r = -ENOMEM; 245962306a36Sopenharmony_ci goto bad_client; 246062306a36Sopenharmony_ci } 246162306a36Sopenharmony_ci cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0); 246262306a36Sopenharmony_ci 246362306a36Sopenharmony_ci c->bdev = bdev; 246462306a36Sopenharmony_ci c->block_size = block_size; 246562306a36Sopenharmony_ci if (is_power_of_2(block_size)) 246662306a36Sopenharmony_ci c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; 246762306a36Sopenharmony_ci else 246862306a36Sopenharmony_ci c->sectors_per_block_bits = -1; 246962306a36Sopenharmony_ci 247062306a36Sopenharmony_ci c->alloc_callback = alloc_callback; 247162306a36Sopenharmony_ci c->write_callback = write_callback; 247262306a36Sopenharmony_ci 247362306a36Sopenharmony_ci if (flags & DM_BUFIO_CLIENT_NO_SLEEP) { 247462306a36Sopenharmony_ci c->no_sleep = true; 247562306a36Sopenharmony_ci static_branch_inc(&no_sleep_enabled); 247662306a36Sopenharmony_ci } 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci mutex_init(&c->lock); 247962306a36Sopenharmony_ci spin_lock_init(&c->spinlock); 248062306a36Sopenharmony_ci INIT_LIST_HEAD(&c->reserved_buffers); 248162306a36Sopenharmony_ci c->need_reserved_buffers = reserved_buffers; 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ci dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS); 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_ci init_waitqueue_head(&c->free_buffer_wait); 248662306a36Sopenharmony_ci c->async_write_error = 0; 248762306a36Sopenharmony_ci 248862306a36Sopenharmony_ci c->dm_io = dm_io_client_create(); 248962306a36Sopenharmony_ci if (IS_ERR(c->dm_io)) { 249062306a36Sopenharmony_ci r = PTR_ERR(c->dm_io); 249162306a36Sopenharmony_ci goto bad_dm_io; 249262306a36Sopenharmony_ci } 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci if (block_size <= KMALLOC_MAX_SIZE && 249562306a36Sopenharmony_ci (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { 249662306a36Sopenharmony_ci unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE); 249762306a36Sopenharmony_ci 249862306a36Sopenharmony_ci snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size); 249962306a36Sopenharmony_ci c->slab_cache = kmem_cache_create(slab_name, block_size, align, 250062306a36Sopenharmony_ci SLAB_RECLAIM_ACCOUNT, NULL); 250162306a36Sopenharmony_ci if (!c->slab_cache) { 250262306a36Sopenharmony_ci r = -ENOMEM; 250362306a36Sopenharmony_ci goto bad; 250462306a36Sopenharmony_ci } 250562306a36Sopenharmony_ci } 250662306a36Sopenharmony_ci if (aux_size) 250762306a36Sopenharmony_ci snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size); 250862306a36Sopenharmony_ci else 250962306a36Sopenharmony_ci snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer"); 251062306a36Sopenharmony_ci c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 251162306a36Sopenharmony_ci 0, SLAB_RECLAIM_ACCOUNT, NULL); 251262306a36Sopenharmony_ci if (!c->slab_buffer) { 251362306a36Sopenharmony_ci r = -ENOMEM; 251462306a36Sopenharmony_ci goto bad; 251562306a36Sopenharmony_ci } 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci while (c->need_reserved_buffers) { 251862306a36Sopenharmony_ci struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); 251962306a36Sopenharmony_ci 252062306a36Sopenharmony_ci if (!b) { 252162306a36Sopenharmony_ci r = -ENOMEM; 252262306a36Sopenharmony_ci goto bad; 252362306a36Sopenharmony_ci } 252462306a36Sopenharmony_ci __free_buffer_wake(b); 252562306a36Sopenharmony_ci } 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci INIT_WORK(&c->shrink_work, shrink_work); 252862306a36Sopenharmony_ci atomic_long_set(&c->need_shrink, 0); 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_ci c->shrinker.count_objects = dm_bufio_shrink_count; 253162306a36Sopenharmony_ci c->shrinker.scan_objects = dm_bufio_shrink_scan; 253262306a36Sopenharmony_ci c->shrinker.seeks = 1; 253362306a36Sopenharmony_ci c->shrinker.batch = 0; 253462306a36Sopenharmony_ci r = register_shrinker(&c->shrinker, "dm-bufio:(%u:%u)", 253562306a36Sopenharmony_ci MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); 253662306a36Sopenharmony_ci if (r) 253762306a36Sopenharmony_ci goto bad; 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 254062306a36Sopenharmony_ci dm_bufio_client_count++; 254162306a36Sopenharmony_ci list_add(&c->client_list, &dm_bufio_all_clients); 254262306a36Sopenharmony_ci __cache_size_refresh(); 254362306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 254462306a36Sopenharmony_ci 254562306a36Sopenharmony_ci return c; 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_cibad: 254862306a36Sopenharmony_ci while (!list_empty(&c->reserved_buffers)) { 254962306a36Sopenharmony_ci struct dm_buffer *b = list_to_buffer(c->reserved_buffers.next); 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci list_del(&b->lru.list); 255262306a36Sopenharmony_ci free_buffer(b); 255362306a36Sopenharmony_ci } 255462306a36Sopenharmony_ci kmem_cache_destroy(c->slab_cache); 255562306a36Sopenharmony_ci kmem_cache_destroy(c->slab_buffer); 255662306a36Sopenharmony_ci dm_io_client_destroy(c->dm_io); 255762306a36Sopenharmony_cibad_dm_io: 255862306a36Sopenharmony_ci mutex_destroy(&c->lock); 255962306a36Sopenharmony_ci if (c->no_sleep) 256062306a36Sopenharmony_ci static_branch_dec(&no_sleep_enabled); 256162306a36Sopenharmony_ci kfree(c); 256262306a36Sopenharmony_cibad_client: 256362306a36Sopenharmony_ci return ERR_PTR(r); 256462306a36Sopenharmony_ci} 256562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_client_create); 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_ci/* 256862306a36Sopenharmony_ci * Free the buffering interface. 256962306a36Sopenharmony_ci * It is required that there are no references on any buffers. 257062306a36Sopenharmony_ci */ 257162306a36Sopenharmony_civoid dm_bufio_client_destroy(struct dm_bufio_client *c) 257262306a36Sopenharmony_ci{ 257362306a36Sopenharmony_ci unsigned int i; 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci drop_buffers(c); 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_ci unregister_shrinker(&c->shrinker); 257862306a36Sopenharmony_ci flush_work(&c->shrink_work); 257962306a36Sopenharmony_ci 258062306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_ci list_del(&c->client_list); 258362306a36Sopenharmony_ci dm_bufio_client_count--; 258462306a36Sopenharmony_ci __cache_size_refresh(); 258562306a36Sopenharmony_ci 258662306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci WARN_ON(c->need_reserved_buffers); 258962306a36Sopenharmony_ci 259062306a36Sopenharmony_ci while (!list_empty(&c->reserved_buffers)) { 259162306a36Sopenharmony_ci struct dm_buffer *b = list_to_buffer(c->reserved_buffers.next); 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci list_del(&b->lru.list); 259462306a36Sopenharmony_ci free_buffer(b); 259562306a36Sopenharmony_ci } 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci for (i = 0; i < LIST_SIZE; i++) 259862306a36Sopenharmony_ci if (cache_count(&c->cache, i)) 259962306a36Sopenharmony_ci DMERR("leaked buffer count %d: %lu", i, cache_count(&c->cache, i)); 260062306a36Sopenharmony_ci 260162306a36Sopenharmony_ci for (i = 0; i < LIST_SIZE; i++) 260262306a36Sopenharmony_ci WARN_ON(cache_count(&c->cache, i)); 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci cache_destroy(&c->cache); 260562306a36Sopenharmony_ci kmem_cache_destroy(c->slab_cache); 260662306a36Sopenharmony_ci kmem_cache_destroy(c->slab_buffer); 260762306a36Sopenharmony_ci dm_io_client_destroy(c->dm_io); 260862306a36Sopenharmony_ci mutex_destroy(&c->lock); 260962306a36Sopenharmony_ci if (c->no_sleep) 261062306a36Sopenharmony_ci static_branch_dec(&no_sleep_enabled); 261162306a36Sopenharmony_ci kfree(c); 261262306a36Sopenharmony_ci} 261362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_client_destroy); 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_civoid dm_bufio_client_reset(struct dm_bufio_client *c) 261662306a36Sopenharmony_ci{ 261762306a36Sopenharmony_ci drop_buffers(c); 261862306a36Sopenharmony_ci flush_work(&c->shrink_work); 261962306a36Sopenharmony_ci} 262062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_client_reset); 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_civoid dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) 262362306a36Sopenharmony_ci{ 262462306a36Sopenharmony_ci c->start = start; 262562306a36Sopenharmony_ci} 262662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_ci/*--------------------------------------------------------------*/ 262962306a36Sopenharmony_ci 263062306a36Sopenharmony_cistatic unsigned int get_max_age_hz(void) 263162306a36Sopenharmony_ci{ 263262306a36Sopenharmony_ci unsigned int max_age = READ_ONCE(dm_bufio_max_age); 263362306a36Sopenharmony_ci 263462306a36Sopenharmony_ci if (max_age > UINT_MAX / HZ) 263562306a36Sopenharmony_ci max_age = UINT_MAX / HZ; 263662306a36Sopenharmony_ci 263762306a36Sopenharmony_ci return max_age * HZ; 263862306a36Sopenharmony_ci} 263962306a36Sopenharmony_ci 264062306a36Sopenharmony_cistatic bool older_than(struct dm_buffer *b, unsigned long age_hz) 264162306a36Sopenharmony_ci{ 264262306a36Sopenharmony_ci return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz); 264362306a36Sopenharmony_ci} 264462306a36Sopenharmony_ci 264562306a36Sopenharmony_cistruct evict_params { 264662306a36Sopenharmony_ci gfp_t gfp; 264762306a36Sopenharmony_ci unsigned long age_hz; 264862306a36Sopenharmony_ci 264962306a36Sopenharmony_ci /* 265062306a36Sopenharmony_ci * This gets updated with the largest last_accessed (ie. most 265162306a36Sopenharmony_ci * recently used) of the evicted buffers. It will not be reinitialised 265262306a36Sopenharmony_ci * by __evict_many(), so you can use it across multiple invocations. 265362306a36Sopenharmony_ci */ 265462306a36Sopenharmony_ci unsigned long last_accessed; 265562306a36Sopenharmony_ci}; 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_ci/* 265862306a36Sopenharmony_ci * We may not be able to evict this buffer if IO pending or the client 265962306a36Sopenharmony_ci * is still using it. 266062306a36Sopenharmony_ci * 266162306a36Sopenharmony_ci * And if GFP_NOFS is used, we must not do any I/O because we hold 266262306a36Sopenharmony_ci * dm_bufio_clients_lock and we would risk deadlock if the I/O gets 266362306a36Sopenharmony_ci * rerouted to different bufio client. 266462306a36Sopenharmony_ci */ 266562306a36Sopenharmony_cistatic enum evict_result select_for_evict(struct dm_buffer *b, void *context) 266662306a36Sopenharmony_ci{ 266762306a36Sopenharmony_ci struct evict_params *params = context; 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci if (!(params->gfp & __GFP_FS) || 267062306a36Sopenharmony_ci (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) { 267162306a36Sopenharmony_ci if (test_bit_acquire(B_READING, &b->state) || 267262306a36Sopenharmony_ci test_bit(B_WRITING, &b->state) || 267362306a36Sopenharmony_ci test_bit(B_DIRTY, &b->state)) 267462306a36Sopenharmony_ci return ER_DONT_EVICT; 267562306a36Sopenharmony_ci } 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP; 267862306a36Sopenharmony_ci} 267962306a36Sopenharmony_ci 268062306a36Sopenharmony_cistatic unsigned long __evict_many(struct dm_bufio_client *c, 268162306a36Sopenharmony_ci struct evict_params *params, 268262306a36Sopenharmony_ci int list_mode, unsigned long max_count) 268362306a36Sopenharmony_ci{ 268462306a36Sopenharmony_ci unsigned long count; 268562306a36Sopenharmony_ci unsigned long last_accessed; 268662306a36Sopenharmony_ci struct dm_buffer *b; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci for (count = 0; count < max_count; count++) { 268962306a36Sopenharmony_ci b = cache_evict(&c->cache, list_mode, select_for_evict, params); 269062306a36Sopenharmony_ci if (!b) 269162306a36Sopenharmony_ci break; 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci last_accessed = READ_ONCE(b->last_accessed); 269462306a36Sopenharmony_ci if (time_after_eq(params->last_accessed, last_accessed)) 269562306a36Sopenharmony_ci params->last_accessed = last_accessed; 269662306a36Sopenharmony_ci 269762306a36Sopenharmony_ci __make_buffer_clean(b); 269862306a36Sopenharmony_ci __free_buffer_wake(b); 269962306a36Sopenharmony_ci 270062306a36Sopenharmony_ci cond_resched(); 270162306a36Sopenharmony_ci } 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci return count; 270462306a36Sopenharmony_ci} 270562306a36Sopenharmony_ci 270662306a36Sopenharmony_cistatic void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) 270762306a36Sopenharmony_ci{ 270862306a36Sopenharmony_ci struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0}; 270962306a36Sopenharmony_ci unsigned long retain = get_retain_buffers(c); 271062306a36Sopenharmony_ci unsigned long count; 271162306a36Sopenharmony_ci LIST_HEAD(write_list); 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci dm_bufio_lock(c); 271462306a36Sopenharmony_ci 271562306a36Sopenharmony_ci __check_watermark(c, &write_list); 271662306a36Sopenharmony_ci if (unlikely(!list_empty(&write_list))) { 271762306a36Sopenharmony_ci dm_bufio_unlock(c); 271862306a36Sopenharmony_ci __flush_write_list(&write_list); 271962306a36Sopenharmony_ci dm_bufio_lock(c); 272062306a36Sopenharmony_ci } 272162306a36Sopenharmony_ci 272262306a36Sopenharmony_ci count = cache_total(&c->cache); 272362306a36Sopenharmony_ci if (count > retain) 272462306a36Sopenharmony_ci __evict_many(c, ¶ms, LIST_CLEAN, count - retain); 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_ci dm_bufio_unlock(c); 272762306a36Sopenharmony_ci} 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_cistatic void cleanup_old_buffers(void) 273062306a36Sopenharmony_ci{ 273162306a36Sopenharmony_ci unsigned long max_age_hz = get_max_age_hz(); 273262306a36Sopenharmony_ci struct dm_bufio_client *c; 273362306a36Sopenharmony_ci 273462306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_ci __cache_size_refresh(); 273762306a36Sopenharmony_ci 273862306a36Sopenharmony_ci list_for_each_entry(c, &dm_bufio_all_clients, client_list) 273962306a36Sopenharmony_ci evict_old_buffers(c, max_age_hz); 274062306a36Sopenharmony_ci 274162306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 274262306a36Sopenharmony_ci} 274362306a36Sopenharmony_ci 274462306a36Sopenharmony_cistatic void work_fn(struct work_struct *w) 274562306a36Sopenharmony_ci{ 274662306a36Sopenharmony_ci cleanup_old_buffers(); 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 274962306a36Sopenharmony_ci DM_BUFIO_WORK_TIMER_SECS * HZ); 275062306a36Sopenharmony_ci} 275162306a36Sopenharmony_ci 275262306a36Sopenharmony_ci/*--------------------------------------------------------------*/ 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci/* 275562306a36Sopenharmony_ci * Global cleanup tries to evict the oldest buffers from across _all_ 275662306a36Sopenharmony_ci * the clients. It does this by repeatedly evicting a few buffers from 275762306a36Sopenharmony_ci * the client that holds the oldest buffer. It's approximate, but hopefully 275862306a36Sopenharmony_ci * good enough. 275962306a36Sopenharmony_ci */ 276062306a36Sopenharmony_cistatic struct dm_bufio_client *__pop_client(void) 276162306a36Sopenharmony_ci{ 276262306a36Sopenharmony_ci struct list_head *h; 276362306a36Sopenharmony_ci 276462306a36Sopenharmony_ci if (list_empty(&dm_bufio_all_clients)) 276562306a36Sopenharmony_ci return NULL; 276662306a36Sopenharmony_ci 276762306a36Sopenharmony_ci h = dm_bufio_all_clients.next; 276862306a36Sopenharmony_ci list_del(h); 276962306a36Sopenharmony_ci return container_of(h, struct dm_bufio_client, client_list); 277062306a36Sopenharmony_ci} 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_ci/* 277362306a36Sopenharmony_ci * Inserts the client in the global client list based on its 277462306a36Sopenharmony_ci * 'oldest_buffer' field. 277562306a36Sopenharmony_ci */ 277662306a36Sopenharmony_cistatic void __insert_client(struct dm_bufio_client *new_client) 277762306a36Sopenharmony_ci{ 277862306a36Sopenharmony_ci struct dm_bufio_client *c; 277962306a36Sopenharmony_ci struct list_head *h = dm_bufio_all_clients.next; 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci while (h != &dm_bufio_all_clients) { 278262306a36Sopenharmony_ci c = container_of(h, struct dm_bufio_client, client_list); 278362306a36Sopenharmony_ci if (time_after_eq(c->oldest_buffer, new_client->oldest_buffer)) 278462306a36Sopenharmony_ci break; 278562306a36Sopenharmony_ci h = h->next; 278662306a36Sopenharmony_ci } 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci list_add_tail(&new_client->client_list, h); 278962306a36Sopenharmony_ci} 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_cistatic unsigned long __evict_a_few(unsigned long nr_buffers) 279262306a36Sopenharmony_ci{ 279362306a36Sopenharmony_ci unsigned long count; 279462306a36Sopenharmony_ci struct dm_bufio_client *c; 279562306a36Sopenharmony_ci struct evict_params params = { 279662306a36Sopenharmony_ci .gfp = GFP_KERNEL, 279762306a36Sopenharmony_ci .age_hz = 0, 279862306a36Sopenharmony_ci /* set to jiffies in case there are no buffers in this client */ 279962306a36Sopenharmony_ci .last_accessed = jiffies 280062306a36Sopenharmony_ci }; 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ci c = __pop_client(); 280362306a36Sopenharmony_ci if (!c) 280462306a36Sopenharmony_ci return 0; 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_ci dm_bufio_lock(c); 280762306a36Sopenharmony_ci count = __evict_many(c, ¶ms, LIST_CLEAN, nr_buffers); 280862306a36Sopenharmony_ci dm_bufio_unlock(c); 280962306a36Sopenharmony_ci 281062306a36Sopenharmony_ci if (count) 281162306a36Sopenharmony_ci c->oldest_buffer = params.last_accessed; 281262306a36Sopenharmony_ci __insert_client(c); 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_ci return count; 281562306a36Sopenharmony_ci} 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_cistatic void check_watermarks(void) 281862306a36Sopenharmony_ci{ 281962306a36Sopenharmony_ci LIST_HEAD(write_list); 282062306a36Sopenharmony_ci struct dm_bufio_client *c; 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 282362306a36Sopenharmony_ci list_for_each_entry(c, &dm_bufio_all_clients, client_list) { 282462306a36Sopenharmony_ci dm_bufio_lock(c); 282562306a36Sopenharmony_ci __check_watermark(c, &write_list); 282662306a36Sopenharmony_ci dm_bufio_unlock(c); 282762306a36Sopenharmony_ci } 282862306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 282962306a36Sopenharmony_ci 283062306a36Sopenharmony_ci __flush_write_list(&write_list); 283162306a36Sopenharmony_ci} 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_cistatic void evict_old(void) 283462306a36Sopenharmony_ci{ 283562306a36Sopenharmony_ci unsigned long threshold = dm_bufio_cache_size - 283662306a36Sopenharmony_ci dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO; 283762306a36Sopenharmony_ci 283862306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 283962306a36Sopenharmony_ci while (dm_bufio_current_allocated > threshold) { 284062306a36Sopenharmony_ci if (!__evict_a_few(64)) 284162306a36Sopenharmony_ci break; 284262306a36Sopenharmony_ci cond_resched(); 284362306a36Sopenharmony_ci } 284462306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 284562306a36Sopenharmony_ci} 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_cistatic void do_global_cleanup(struct work_struct *w) 284862306a36Sopenharmony_ci{ 284962306a36Sopenharmony_ci check_watermarks(); 285062306a36Sopenharmony_ci evict_old(); 285162306a36Sopenharmony_ci} 285262306a36Sopenharmony_ci 285362306a36Sopenharmony_ci/* 285462306a36Sopenharmony_ci *-------------------------------------------------------------- 285562306a36Sopenharmony_ci * Module setup 285662306a36Sopenharmony_ci *-------------------------------------------------------------- 285762306a36Sopenharmony_ci */ 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_ci/* 286062306a36Sopenharmony_ci * This is called only once for the whole dm_bufio module. 286162306a36Sopenharmony_ci * It initializes memory limit. 286262306a36Sopenharmony_ci */ 286362306a36Sopenharmony_cistatic int __init dm_bufio_init(void) 286462306a36Sopenharmony_ci{ 286562306a36Sopenharmony_ci __u64 mem; 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci dm_bufio_allocated_kmem_cache = 0; 286862306a36Sopenharmony_ci dm_bufio_allocated_get_free_pages = 0; 286962306a36Sopenharmony_ci dm_bufio_allocated_vmalloc = 0; 287062306a36Sopenharmony_ci dm_bufio_current_allocated = 0; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), 287362306a36Sopenharmony_ci DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci if (mem > ULONG_MAX) 287662306a36Sopenharmony_ci mem = ULONG_MAX; 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_ci#ifdef CONFIG_MMU 287962306a36Sopenharmony_ci if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100)) 288062306a36Sopenharmony_ci mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100); 288162306a36Sopenharmony_ci#endif 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci dm_bufio_default_cache_size = mem; 288462306a36Sopenharmony_ci 288562306a36Sopenharmony_ci mutex_lock(&dm_bufio_clients_lock); 288662306a36Sopenharmony_ci __cache_size_refresh(); 288762306a36Sopenharmony_ci mutex_unlock(&dm_bufio_clients_lock); 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); 289062306a36Sopenharmony_ci if (!dm_bufio_wq) 289162306a36Sopenharmony_ci return -ENOMEM; 289262306a36Sopenharmony_ci 289362306a36Sopenharmony_ci INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); 289462306a36Sopenharmony_ci INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); 289562306a36Sopenharmony_ci queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 289662306a36Sopenharmony_ci DM_BUFIO_WORK_TIMER_SECS * HZ); 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci return 0; 289962306a36Sopenharmony_ci} 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci/* 290262306a36Sopenharmony_ci * This is called once when unloading the dm_bufio module. 290362306a36Sopenharmony_ci */ 290462306a36Sopenharmony_cistatic void __exit dm_bufio_exit(void) 290562306a36Sopenharmony_ci{ 290662306a36Sopenharmony_ci int bug = 0; 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_ci cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); 290962306a36Sopenharmony_ci destroy_workqueue(dm_bufio_wq); 291062306a36Sopenharmony_ci 291162306a36Sopenharmony_ci if (dm_bufio_client_count) { 291262306a36Sopenharmony_ci DMCRIT("%s: dm_bufio_client_count leaked: %d", 291362306a36Sopenharmony_ci __func__, dm_bufio_client_count); 291462306a36Sopenharmony_ci bug = 1; 291562306a36Sopenharmony_ci } 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci if (dm_bufio_current_allocated) { 291862306a36Sopenharmony_ci DMCRIT("%s: dm_bufio_current_allocated leaked: %lu", 291962306a36Sopenharmony_ci __func__, dm_bufio_current_allocated); 292062306a36Sopenharmony_ci bug = 1; 292162306a36Sopenharmony_ci } 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci if (dm_bufio_allocated_get_free_pages) { 292462306a36Sopenharmony_ci DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu", 292562306a36Sopenharmony_ci __func__, dm_bufio_allocated_get_free_pages); 292662306a36Sopenharmony_ci bug = 1; 292762306a36Sopenharmony_ci } 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci if (dm_bufio_allocated_vmalloc) { 293062306a36Sopenharmony_ci DMCRIT("%s: dm_bufio_vmalloc leaked: %lu", 293162306a36Sopenharmony_ci __func__, dm_bufio_allocated_vmalloc); 293262306a36Sopenharmony_ci bug = 1; 293362306a36Sopenharmony_ci } 293462306a36Sopenharmony_ci 293562306a36Sopenharmony_ci WARN_ON(bug); /* leaks are not worth crashing the system */ 293662306a36Sopenharmony_ci} 293762306a36Sopenharmony_ci 293862306a36Sopenharmony_cimodule_init(dm_bufio_init) 293962306a36Sopenharmony_cimodule_exit(dm_bufio_exit) 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_cimodule_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644); 294262306a36Sopenharmony_ciMODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); 294362306a36Sopenharmony_ci 294462306a36Sopenharmony_cimodule_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644); 294562306a36Sopenharmony_ciMODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_cimodule_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644); 294862306a36Sopenharmony_ciMODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); 294962306a36Sopenharmony_ci 295062306a36Sopenharmony_cimodule_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, 0644); 295162306a36Sopenharmony_ciMODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_cimodule_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, 0444); 295462306a36Sopenharmony_ciMODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_cimodule_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, 0444); 295762306a36Sopenharmony_ciMODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_cimodule_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, 0444); 296062306a36Sopenharmony_ciMODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc"); 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_cimodule_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, 0444); 296362306a36Sopenharmony_ciMODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache"); 296462306a36Sopenharmony_ci 296562306a36Sopenharmony_ciMODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 296662306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " buffered I/O library"); 296762306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 2968