18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Ram backed block device driver. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2007 Nick Piggin 68c2ecf20Sopenharmony_ci * Copyright (C) 2007 Novell Inc. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright 98c2ecf20Sopenharmony_ci * of their respective owners. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/init.h> 138c2ecf20Sopenharmony_ci#include <linux/initrd.h> 148c2ecf20Sopenharmony_ci#include <linux/module.h> 158c2ecf20Sopenharmony_ci#include <linux/moduleparam.h> 168c2ecf20Sopenharmony_ci#include <linux/major.h> 178c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 188c2ecf20Sopenharmony_ci#include <linux/bio.h> 198c2ecf20Sopenharmony_ci#include <linux/highmem.h> 208c2ecf20Sopenharmony_ci#include <linux/mutex.h> 218c2ecf20Sopenharmony_ci#include <linux/radix-tree.h> 228c2ecf20Sopenharmony_ci#include <linux/fs.h> 238c2ecf20Sopenharmony_ci#include <linux/slab.h> 248c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) 298c2ecf20Sopenharmony_ci#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* 328c2ecf20Sopenharmony_ci * Each block ramdisk device has a radix_tree brd_pages of pages that stores 338c2ecf20Sopenharmony_ci * the pages containing the block device's contents. A brd page's ->index is 348c2ecf20Sopenharmony_ci * its offset in PAGE_SIZE units. This is similar to, but in no way connected 358c2ecf20Sopenharmony_ci * with, the kernel's pagecache or buffer cache (which sit above our block 368c2ecf20Sopenharmony_ci * device). 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_cistruct brd_device { 398c2ecf20Sopenharmony_ci int brd_number; 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci struct request_queue *brd_queue; 428c2ecf20Sopenharmony_ci struct gendisk *brd_disk; 438c2ecf20Sopenharmony_ci struct list_head brd_list; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci /* 468c2ecf20Sopenharmony_ci * Backing store of pages and lock to protect it. This is the contents 478c2ecf20Sopenharmony_ci * of the block device. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ci spinlock_t brd_lock; 508c2ecf20Sopenharmony_ci struct radix_tree_root brd_pages; 518c2ecf20Sopenharmony_ci}; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * Look up and return a brd's page for a given sector. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_cistatic struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) 578c2ecf20Sopenharmony_ci{ 588c2ecf20Sopenharmony_ci pgoff_t idx; 598c2ecf20Sopenharmony_ci struct page *page; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci /* 628c2ecf20Sopenharmony_ci * The page lifetime is protected by the fact that we have opened the 638c2ecf20Sopenharmony_ci * device node -- brd pages will never be deleted under us, so we 648c2ecf20Sopenharmony_ci * don't need any further locking or refcounting. 658c2ecf20Sopenharmony_ci * 668c2ecf20Sopenharmony_ci * This is strictly true for the radix-tree nodes as well (ie. we 678c2ecf20Sopenharmony_ci * don't actually need the rcu_read_lock()), however that is not a 688c2ecf20Sopenharmony_ci * documented feature of the radix-tree API so it is better to be 698c2ecf20Sopenharmony_ci * safe here (we don't have total exclusion from radix tree updates 708c2ecf20Sopenharmony_ci * here, only deletes). 718c2ecf20Sopenharmony_ci */ 728c2ecf20Sopenharmony_ci rcu_read_lock(); 738c2ecf20Sopenharmony_ci idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ 748c2ecf20Sopenharmony_ci page = radix_tree_lookup(&brd->brd_pages, idx); 758c2ecf20Sopenharmony_ci rcu_read_unlock(); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci BUG_ON(page && page->index != idx); 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci return page; 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci/* 838c2ecf20Sopenharmony_ci * Insert a new page for a given sector, if one does not already exist. 848c2ecf20Sopenharmony_ci */ 858c2ecf20Sopenharmony_cistatic int brd_insert_page(struct brd_device *brd, sector_t sector) 868c2ecf20Sopenharmony_ci{ 878c2ecf20Sopenharmony_ci pgoff_t idx; 888c2ecf20Sopenharmony_ci struct page *page; 898c2ecf20Sopenharmony_ci gfp_t gfp_flags; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci page = brd_lookup_page(brd, sector); 928c2ecf20Sopenharmony_ci if (page) 938c2ecf20Sopenharmony_ci return 0; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci /* 968c2ecf20Sopenharmony_ci * Must use NOIO because we don't want to recurse back into the 978c2ecf20Sopenharmony_ci * block or filesystem layers from page reclaim. 988c2ecf20Sopenharmony_ci */ 998c2ecf20Sopenharmony_ci gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; 1008c2ecf20Sopenharmony_ci page = alloc_page(gfp_flags); 1018c2ecf20Sopenharmony_ci if (!page) 1028c2ecf20Sopenharmony_ci return -ENOMEM; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci if (radix_tree_preload(GFP_NOIO)) { 1058c2ecf20Sopenharmony_ci __free_page(page); 1068c2ecf20Sopenharmony_ci return -ENOMEM; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci spin_lock(&brd->brd_lock); 1108c2ecf20Sopenharmony_ci idx = sector >> PAGE_SECTORS_SHIFT; 1118c2ecf20Sopenharmony_ci page->index = idx; 1128c2ecf20Sopenharmony_ci if (radix_tree_insert(&brd->brd_pages, idx, page)) { 1138c2ecf20Sopenharmony_ci __free_page(page); 1148c2ecf20Sopenharmony_ci page = radix_tree_lookup(&brd->brd_pages, idx); 1158c2ecf20Sopenharmony_ci BUG_ON(!page); 1168c2ecf20Sopenharmony_ci BUG_ON(page->index != idx); 1178c2ecf20Sopenharmony_ci } 1188c2ecf20Sopenharmony_ci spin_unlock(&brd->brd_lock); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci radix_tree_preload_end(); 1218c2ecf20Sopenharmony_ci return 0; 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci/* 1258c2ecf20Sopenharmony_ci * Free all backing store pages and radix tree. This must only be called when 1268c2ecf20Sopenharmony_ci * there are no other users of the device. 1278c2ecf20Sopenharmony_ci */ 1288c2ecf20Sopenharmony_ci#define FREE_BATCH 16 1298c2ecf20Sopenharmony_cistatic void brd_free_pages(struct brd_device *brd) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci unsigned long pos = 0; 1328c2ecf20Sopenharmony_ci struct page *pages[FREE_BATCH]; 1338c2ecf20Sopenharmony_ci int nr_pages; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci do { 1368c2ecf20Sopenharmony_ci int i; 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci nr_pages = radix_tree_gang_lookup(&brd->brd_pages, 1398c2ecf20Sopenharmony_ci (void **)pages, pos, FREE_BATCH); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 1428c2ecf20Sopenharmony_ci void *ret; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci BUG_ON(pages[i]->index < pos); 1458c2ecf20Sopenharmony_ci pos = pages[i]->index; 1468c2ecf20Sopenharmony_ci ret = radix_tree_delete(&brd->brd_pages, pos); 1478c2ecf20Sopenharmony_ci BUG_ON(!ret || ret != pages[i]); 1488c2ecf20Sopenharmony_ci __free_page(pages[i]); 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci pos++; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* 1548c2ecf20Sopenharmony_ci * It takes 3.4 seconds to remove 80GiB ramdisk. 1558c2ecf20Sopenharmony_ci * So, we need cond_resched to avoid stalling the CPU. 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_ci cond_resched(); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* 1608c2ecf20Sopenharmony_ci * This assumes radix_tree_gang_lookup always returns as 1618c2ecf20Sopenharmony_ci * many pages as possible. If the radix-tree code changes, 1628c2ecf20Sopenharmony_ci * so will this have to. 1638c2ecf20Sopenharmony_ci */ 1648c2ecf20Sopenharmony_ci } while (nr_pages == FREE_BATCH); 1658c2ecf20Sopenharmony_ci} 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci/* 1688c2ecf20Sopenharmony_ci * copy_to_brd_setup must be called before copy_to_brd. It may sleep. 1698c2ecf20Sopenharmony_ci */ 1708c2ecf20Sopenharmony_cistatic int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 1738c2ecf20Sopenharmony_ci size_t copy; 1748c2ecf20Sopenharmony_ci int ret; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci copy = min_t(size_t, n, PAGE_SIZE - offset); 1778c2ecf20Sopenharmony_ci ret = brd_insert_page(brd, sector); 1788c2ecf20Sopenharmony_ci if (ret) 1798c2ecf20Sopenharmony_ci return ret; 1808c2ecf20Sopenharmony_ci if (copy < n) { 1818c2ecf20Sopenharmony_ci sector += copy >> SECTOR_SHIFT; 1828c2ecf20Sopenharmony_ci ret = brd_insert_page(brd, sector); 1838c2ecf20Sopenharmony_ci } 1848c2ecf20Sopenharmony_ci return ret; 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci/* 1888c2ecf20Sopenharmony_ci * Copy n bytes from src to the brd starting at sector. Does not sleep. 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_cistatic void copy_to_brd(struct brd_device *brd, const void *src, 1918c2ecf20Sopenharmony_ci sector_t sector, size_t n) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci struct page *page; 1948c2ecf20Sopenharmony_ci void *dst; 1958c2ecf20Sopenharmony_ci unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 1968c2ecf20Sopenharmony_ci size_t copy; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci copy = min_t(size_t, n, PAGE_SIZE - offset); 1998c2ecf20Sopenharmony_ci page = brd_lookup_page(brd, sector); 2008c2ecf20Sopenharmony_ci BUG_ON(!page); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci dst = kmap_atomic(page); 2038c2ecf20Sopenharmony_ci memcpy(dst + offset, src, copy); 2048c2ecf20Sopenharmony_ci kunmap_atomic(dst); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci if (copy < n) { 2078c2ecf20Sopenharmony_ci src += copy; 2088c2ecf20Sopenharmony_ci sector += copy >> SECTOR_SHIFT; 2098c2ecf20Sopenharmony_ci copy = n - copy; 2108c2ecf20Sopenharmony_ci page = brd_lookup_page(brd, sector); 2118c2ecf20Sopenharmony_ci BUG_ON(!page); 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci dst = kmap_atomic(page); 2148c2ecf20Sopenharmony_ci memcpy(dst, src, copy); 2158c2ecf20Sopenharmony_ci kunmap_atomic(dst); 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci/* 2208c2ecf20Sopenharmony_ci * Copy n bytes to dst from the brd starting at sector. Does not sleep. 2218c2ecf20Sopenharmony_ci */ 2228c2ecf20Sopenharmony_cistatic void copy_from_brd(void *dst, struct brd_device *brd, 2238c2ecf20Sopenharmony_ci sector_t sector, size_t n) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci struct page *page; 2268c2ecf20Sopenharmony_ci void *src; 2278c2ecf20Sopenharmony_ci unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; 2288c2ecf20Sopenharmony_ci size_t copy; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci copy = min_t(size_t, n, PAGE_SIZE - offset); 2318c2ecf20Sopenharmony_ci page = brd_lookup_page(brd, sector); 2328c2ecf20Sopenharmony_ci if (page) { 2338c2ecf20Sopenharmony_ci src = kmap_atomic(page); 2348c2ecf20Sopenharmony_ci memcpy(dst, src + offset, copy); 2358c2ecf20Sopenharmony_ci kunmap_atomic(src); 2368c2ecf20Sopenharmony_ci } else 2378c2ecf20Sopenharmony_ci memset(dst, 0, copy); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci if (copy < n) { 2408c2ecf20Sopenharmony_ci dst += copy; 2418c2ecf20Sopenharmony_ci sector += copy >> SECTOR_SHIFT; 2428c2ecf20Sopenharmony_ci copy = n - copy; 2438c2ecf20Sopenharmony_ci page = brd_lookup_page(brd, sector); 2448c2ecf20Sopenharmony_ci if (page) { 2458c2ecf20Sopenharmony_ci src = kmap_atomic(page); 2468c2ecf20Sopenharmony_ci memcpy(dst, src, copy); 2478c2ecf20Sopenharmony_ci kunmap_atomic(src); 2488c2ecf20Sopenharmony_ci } else 2498c2ecf20Sopenharmony_ci memset(dst, 0, copy); 2508c2ecf20Sopenharmony_ci } 2518c2ecf20Sopenharmony_ci} 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci/* 2548c2ecf20Sopenharmony_ci * Process a single bvec of a bio. 2558c2ecf20Sopenharmony_ci */ 2568c2ecf20Sopenharmony_cistatic int brd_do_bvec(struct brd_device *brd, struct page *page, 2578c2ecf20Sopenharmony_ci unsigned int len, unsigned int off, unsigned int op, 2588c2ecf20Sopenharmony_ci sector_t sector) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci void *mem; 2618c2ecf20Sopenharmony_ci int err = 0; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci if (op_is_write(op)) { 2648c2ecf20Sopenharmony_ci err = copy_to_brd_setup(brd, sector, len); 2658c2ecf20Sopenharmony_ci if (err) 2668c2ecf20Sopenharmony_ci goto out; 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci mem = kmap_atomic(page); 2708c2ecf20Sopenharmony_ci if (!op_is_write(op)) { 2718c2ecf20Sopenharmony_ci copy_from_brd(mem + off, brd, sector, len); 2728c2ecf20Sopenharmony_ci flush_dcache_page(page); 2738c2ecf20Sopenharmony_ci } else { 2748c2ecf20Sopenharmony_ci flush_dcache_page(page); 2758c2ecf20Sopenharmony_ci copy_to_brd(brd, mem + off, sector, len); 2768c2ecf20Sopenharmony_ci } 2778c2ecf20Sopenharmony_ci kunmap_atomic(mem); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ciout: 2808c2ecf20Sopenharmony_ci return err; 2818c2ecf20Sopenharmony_ci} 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_cistatic blk_qc_t brd_submit_bio(struct bio *bio) 2848c2ecf20Sopenharmony_ci{ 2858c2ecf20Sopenharmony_ci struct brd_device *brd = bio->bi_disk->private_data; 2868c2ecf20Sopenharmony_ci struct bio_vec bvec; 2878c2ecf20Sopenharmony_ci sector_t sector; 2888c2ecf20Sopenharmony_ci struct bvec_iter iter; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci sector = bio->bi_iter.bi_sector; 2918c2ecf20Sopenharmony_ci if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) 2928c2ecf20Sopenharmony_ci goto io_error; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 2958c2ecf20Sopenharmony_ci unsigned int len = bvec.bv_len; 2968c2ecf20Sopenharmony_ci int err; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Don't support un-aligned buffer */ 2998c2ecf20Sopenharmony_ci WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || 3008c2ecf20Sopenharmony_ci (len & (SECTOR_SIZE - 1))); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, 3038c2ecf20Sopenharmony_ci bio_op(bio), sector); 3048c2ecf20Sopenharmony_ci if (err) 3058c2ecf20Sopenharmony_ci goto io_error; 3068c2ecf20Sopenharmony_ci sector += len >> SECTOR_SHIFT; 3078c2ecf20Sopenharmony_ci } 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci bio_endio(bio); 3108c2ecf20Sopenharmony_ci return BLK_QC_T_NONE; 3118c2ecf20Sopenharmony_ciio_error: 3128c2ecf20Sopenharmony_ci bio_io_error(bio); 3138c2ecf20Sopenharmony_ci return BLK_QC_T_NONE; 3148c2ecf20Sopenharmony_ci} 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_cistatic int brd_rw_page(struct block_device *bdev, sector_t sector, 3178c2ecf20Sopenharmony_ci struct page *page, unsigned int op) 3188c2ecf20Sopenharmony_ci{ 3198c2ecf20Sopenharmony_ci struct brd_device *brd = bdev->bd_disk->private_data; 3208c2ecf20Sopenharmony_ci int err; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci if (PageTransHuge(page)) 3238c2ecf20Sopenharmony_ci return -ENOTSUPP; 3248c2ecf20Sopenharmony_ci err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector); 3258c2ecf20Sopenharmony_ci page_endio(page, op_is_write(op), err); 3268c2ecf20Sopenharmony_ci return err; 3278c2ecf20Sopenharmony_ci} 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_cistatic const struct block_device_operations brd_fops = { 3308c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 3318c2ecf20Sopenharmony_ci .submit_bio = brd_submit_bio, 3328c2ecf20Sopenharmony_ci .rw_page = brd_rw_page, 3338c2ecf20Sopenharmony_ci}; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci/* 3368c2ecf20Sopenharmony_ci * And now the modules code and kernel interface. 3378c2ecf20Sopenharmony_ci */ 3388c2ecf20Sopenharmony_cistatic int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; 3398c2ecf20Sopenharmony_cimodule_param(rd_nr, int, 0444); 3408c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ciunsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; 3438c2ecf20Sopenharmony_cimodule_param(rd_size, ulong, 0444); 3448c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_cistatic int max_part = 1; 3478c2ecf20Sopenharmony_cimodule_param(max_part, int, 0444); 3488c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 3518c2ecf20Sopenharmony_ciMODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); 3528c2ecf20Sopenharmony_ciMODULE_ALIAS("rd"); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci#ifndef MODULE 3558c2ecf20Sopenharmony_ci/* Legacy boot options - nonmodular */ 3568c2ecf20Sopenharmony_cistatic int __init ramdisk_size(char *str) 3578c2ecf20Sopenharmony_ci{ 3588c2ecf20Sopenharmony_ci rd_size = simple_strtol(str, NULL, 0); 3598c2ecf20Sopenharmony_ci return 1; 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci__setup("ramdisk_size=", ramdisk_size); 3628c2ecf20Sopenharmony_ci#endif 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci/* 3658c2ecf20Sopenharmony_ci * The device scheme is derived from loop.c. Keep them in synch where possible 3668c2ecf20Sopenharmony_ci * (should share code eventually). 3678c2ecf20Sopenharmony_ci */ 3688c2ecf20Sopenharmony_cistatic LIST_HEAD(brd_devices); 3698c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(brd_devices_mutex); 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_cistatic struct brd_device *brd_alloc(int i) 3728c2ecf20Sopenharmony_ci{ 3738c2ecf20Sopenharmony_ci struct brd_device *brd; 3748c2ecf20Sopenharmony_ci struct gendisk *disk; 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci brd = kzalloc(sizeof(*brd), GFP_KERNEL); 3778c2ecf20Sopenharmony_ci if (!brd) 3788c2ecf20Sopenharmony_ci goto out; 3798c2ecf20Sopenharmony_ci brd->brd_number = i; 3808c2ecf20Sopenharmony_ci spin_lock_init(&brd->brd_lock); 3818c2ecf20Sopenharmony_ci INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE); 3848c2ecf20Sopenharmony_ci if (!brd->brd_queue) 3858c2ecf20Sopenharmony_ci goto out_free_dev; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci /* This is so fdisk will align partitions on 4k, because of 3888c2ecf20Sopenharmony_ci * direct_access API needing 4k alignment, returning a PFN 3898c2ecf20Sopenharmony_ci * (This is only a problem on very small devices <= 4M, 3908c2ecf20Sopenharmony_ci * otherwise fdisk will align on 1M. Regardless this call 3918c2ecf20Sopenharmony_ci * is harmless) 3928c2ecf20Sopenharmony_ci */ 3938c2ecf20Sopenharmony_ci blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); 3948c2ecf20Sopenharmony_ci disk = brd->brd_disk = alloc_disk(max_part); 3958c2ecf20Sopenharmony_ci if (!disk) 3968c2ecf20Sopenharmony_ci goto out_free_queue; 3978c2ecf20Sopenharmony_ci disk->major = RAMDISK_MAJOR; 3988c2ecf20Sopenharmony_ci disk->first_minor = i * max_part; 3998c2ecf20Sopenharmony_ci disk->fops = &brd_fops; 4008c2ecf20Sopenharmony_ci disk->private_data = brd; 4018c2ecf20Sopenharmony_ci disk->flags = GENHD_FL_EXT_DEVT; 4028c2ecf20Sopenharmony_ci sprintf(disk->disk_name, "ram%d", i); 4038c2ecf20Sopenharmony_ci set_capacity(disk, rd_size * 2); 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci /* Tell the block layer that this is not a rotational device */ 4068c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); 4078c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue); 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci return brd; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ciout_free_queue: 4128c2ecf20Sopenharmony_ci blk_cleanup_queue(brd->brd_queue); 4138c2ecf20Sopenharmony_ciout_free_dev: 4148c2ecf20Sopenharmony_ci kfree(brd); 4158c2ecf20Sopenharmony_ciout: 4168c2ecf20Sopenharmony_ci return NULL; 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_cistatic void brd_free(struct brd_device *brd) 4208c2ecf20Sopenharmony_ci{ 4218c2ecf20Sopenharmony_ci put_disk(brd->brd_disk); 4228c2ecf20Sopenharmony_ci blk_cleanup_queue(brd->brd_queue); 4238c2ecf20Sopenharmony_ci brd_free_pages(brd); 4248c2ecf20Sopenharmony_ci kfree(brd); 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_cistatic struct brd_device *brd_init_one(int i, bool *new) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci struct brd_device *brd; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci *new = false; 4328c2ecf20Sopenharmony_ci list_for_each_entry(brd, &brd_devices, brd_list) { 4338c2ecf20Sopenharmony_ci if (brd->brd_number == i) 4348c2ecf20Sopenharmony_ci goto out; 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci brd = brd_alloc(i); 4388c2ecf20Sopenharmony_ci if (brd) { 4398c2ecf20Sopenharmony_ci brd->brd_disk->queue = brd->brd_queue; 4408c2ecf20Sopenharmony_ci add_disk(brd->brd_disk); 4418c2ecf20Sopenharmony_ci list_add_tail(&brd->brd_list, &brd_devices); 4428c2ecf20Sopenharmony_ci } 4438c2ecf20Sopenharmony_ci *new = true; 4448c2ecf20Sopenharmony_ciout: 4458c2ecf20Sopenharmony_ci return brd; 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_cistatic void brd_del_one(struct brd_device *brd) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci list_del(&brd->brd_list); 4518c2ecf20Sopenharmony_ci del_gendisk(brd->brd_disk); 4528c2ecf20Sopenharmony_ci brd_free(brd); 4538c2ecf20Sopenharmony_ci} 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_cistatic struct kobject *brd_probe(dev_t dev, int *part, void *data) 4568c2ecf20Sopenharmony_ci{ 4578c2ecf20Sopenharmony_ci struct brd_device *brd; 4588c2ecf20Sopenharmony_ci struct kobject *kobj; 4598c2ecf20Sopenharmony_ci bool new; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci mutex_lock(&brd_devices_mutex); 4628c2ecf20Sopenharmony_ci brd = brd_init_one(MINOR(dev) / max_part, &new); 4638c2ecf20Sopenharmony_ci kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; 4648c2ecf20Sopenharmony_ci mutex_unlock(&brd_devices_mutex); 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci if (new) 4678c2ecf20Sopenharmony_ci *part = 0; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci return kobj; 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_cistatic inline void brd_check_and_reset_par(void) 4738c2ecf20Sopenharmony_ci{ 4748c2ecf20Sopenharmony_ci if (unlikely(!max_part)) 4758c2ecf20Sopenharmony_ci max_part = 1; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci /* 4788c2ecf20Sopenharmony_ci * make sure 'max_part' can be divided exactly by (1U << MINORBITS), 4798c2ecf20Sopenharmony_ci * otherwise, it is possiable to get same dev_t when adding partitions. 4808c2ecf20Sopenharmony_ci */ 4818c2ecf20Sopenharmony_ci if ((1U << MINORBITS) % max_part != 0) 4828c2ecf20Sopenharmony_ci max_part = 1UL << fls(max_part); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci if (max_part > DISK_MAX_PARTS) { 4858c2ecf20Sopenharmony_ci pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", 4868c2ecf20Sopenharmony_ci DISK_MAX_PARTS, DISK_MAX_PARTS); 4878c2ecf20Sopenharmony_ci max_part = DISK_MAX_PARTS; 4888c2ecf20Sopenharmony_ci } 4898c2ecf20Sopenharmony_ci} 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_cistatic int __init brd_init(void) 4928c2ecf20Sopenharmony_ci{ 4938c2ecf20Sopenharmony_ci struct brd_device *brd, *next; 4948c2ecf20Sopenharmony_ci int i; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci /* 4978c2ecf20Sopenharmony_ci * brd module now has a feature to instantiate underlying device 4988c2ecf20Sopenharmony_ci * structure on-demand, provided that there is an access dev node. 4998c2ecf20Sopenharmony_ci * 5008c2ecf20Sopenharmony_ci * (1) if rd_nr is specified, create that many upfront. else 5018c2ecf20Sopenharmony_ci * it defaults to CONFIG_BLK_DEV_RAM_COUNT 5028c2ecf20Sopenharmony_ci * (2) User can further extend brd devices by create dev node themselves 5038c2ecf20Sopenharmony_ci * and have kernel automatically instantiate actual device 5048c2ecf20Sopenharmony_ci * on-demand. Example: 5058c2ecf20Sopenharmony_ci * mknod /path/devnod_name b 1 X # 1 is the rd major 5068c2ecf20Sopenharmony_ci * fdisk -l /path/devnod_name 5078c2ecf20Sopenharmony_ci * If (X / max_part) was not already created it will be created 5088c2ecf20Sopenharmony_ci * dynamically. 5098c2ecf20Sopenharmony_ci */ 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) 5128c2ecf20Sopenharmony_ci return -EIO; 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci brd_check_and_reset_par(); 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci for (i = 0; i < rd_nr; i++) { 5178c2ecf20Sopenharmony_ci brd = brd_alloc(i); 5188c2ecf20Sopenharmony_ci if (!brd) 5198c2ecf20Sopenharmony_ci goto out_free; 5208c2ecf20Sopenharmony_ci list_add_tail(&brd->brd_list, &brd_devices); 5218c2ecf20Sopenharmony_ci } 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci /* point of no return */ 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci list_for_each_entry(brd, &brd_devices, brd_list) { 5268c2ecf20Sopenharmony_ci /* 5278c2ecf20Sopenharmony_ci * associate with queue just before adding disk for 5288c2ecf20Sopenharmony_ci * avoiding to mess up failure path 5298c2ecf20Sopenharmony_ci */ 5308c2ecf20Sopenharmony_ci brd->brd_disk->queue = brd->brd_queue; 5318c2ecf20Sopenharmony_ci add_disk(brd->brd_disk); 5328c2ecf20Sopenharmony_ci } 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, 5358c2ecf20Sopenharmony_ci THIS_MODULE, brd_probe, NULL, NULL); 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci pr_info("brd: module loaded\n"); 5388c2ecf20Sopenharmony_ci return 0; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ciout_free: 5418c2ecf20Sopenharmony_ci list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { 5428c2ecf20Sopenharmony_ci list_del(&brd->brd_list); 5438c2ecf20Sopenharmony_ci brd_free(brd); 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci pr_info("brd: module NOT loaded !!!\n"); 5488c2ecf20Sopenharmony_ci return -ENOMEM; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_cistatic void __exit brd_exit(void) 5528c2ecf20Sopenharmony_ci{ 5538c2ecf20Sopenharmony_ci struct brd_device *brd, *next; 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci list_for_each_entry_safe(brd, next, &brd_devices, brd_list) 5568c2ecf20Sopenharmony_ci brd_del_one(brd); 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); 5598c2ecf20Sopenharmony_ci unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci pr_info("brd: module unloaded\n"); 5628c2ecf20Sopenharmony_ci} 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_cimodule_init(brd_init); 5658c2ecf20Sopenharmony_cimodule_exit(brd_exit); 5668c2ecf20Sopenharmony_ci 567