xref: /kernel/linux/linux-5.10/drivers/block/brd.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Ram backed block device driver.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2007 Nick Piggin
68c2ecf20Sopenharmony_ci * Copyright (C) 2007 Novell Inc.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
98c2ecf20Sopenharmony_ci * of their respective owners.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/init.h>
138c2ecf20Sopenharmony_ci#include <linux/initrd.h>
148c2ecf20Sopenharmony_ci#include <linux/module.h>
158c2ecf20Sopenharmony_ci#include <linux/moduleparam.h>
168c2ecf20Sopenharmony_ci#include <linux/major.h>
178c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
188c2ecf20Sopenharmony_ci#include <linux/bio.h>
198c2ecf20Sopenharmony_ci#include <linux/highmem.h>
208c2ecf20Sopenharmony_ci#include <linux/mutex.h>
218c2ecf20Sopenharmony_ci#include <linux/radix-tree.h>
228c2ecf20Sopenharmony_ci#include <linux/fs.h>
238c2ecf20Sopenharmony_ci#include <linux/slab.h>
248c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
298c2ecf20Sopenharmony_ci#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci/*
328c2ecf20Sopenharmony_ci * Each block ramdisk device has a radix_tree brd_pages of pages that stores
338c2ecf20Sopenharmony_ci * the pages containing the block device's contents. A brd page's ->index is
348c2ecf20Sopenharmony_ci * its offset in PAGE_SIZE units. This is similar to, but in no way connected
358c2ecf20Sopenharmony_ci * with, the kernel's pagecache or buffer cache (which sit above our block
368c2ecf20Sopenharmony_ci * device).
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_cistruct brd_device {
398c2ecf20Sopenharmony_ci	int		brd_number;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	struct request_queue	*brd_queue;
428c2ecf20Sopenharmony_ci	struct gendisk		*brd_disk;
438c2ecf20Sopenharmony_ci	struct list_head	brd_list;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	/*
468c2ecf20Sopenharmony_ci	 * Backing store of pages and lock to protect it. This is the contents
478c2ecf20Sopenharmony_ci	 * of the block device.
488c2ecf20Sopenharmony_ci	 */
498c2ecf20Sopenharmony_ci	spinlock_t		brd_lock;
508c2ecf20Sopenharmony_ci	struct radix_tree_root	brd_pages;
518c2ecf20Sopenharmony_ci};
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci/*
548c2ecf20Sopenharmony_ci * Look up and return a brd's page for a given sector.
558c2ecf20Sopenharmony_ci */
568c2ecf20Sopenharmony_cistatic struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	pgoff_t idx;
598c2ecf20Sopenharmony_ci	struct page *page;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	/*
628c2ecf20Sopenharmony_ci	 * The page lifetime is protected by the fact that we have opened the
638c2ecf20Sopenharmony_ci	 * device node -- brd pages will never be deleted under us, so we
648c2ecf20Sopenharmony_ci	 * don't need any further locking or refcounting.
658c2ecf20Sopenharmony_ci	 *
668c2ecf20Sopenharmony_ci	 * This is strictly true for the radix-tree nodes as well (ie. we
678c2ecf20Sopenharmony_ci	 * don't actually need the rcu_read_lock()), however that is not a
688c2ecf20Sopenharmony_ci	 * documented feature of the radix-tree API so it is better to be
698c2ecf20Sopenharmony_ci	 * safe here (we don't have total exclusion from radix tree updates
708c2ecf20Sopenharmony_ci	 * here, only deletes).
718c2ecf20Sopenharmony_ci	 */
728c2ecf20Sopenharmony_ci	rcu_read_lock();
738c2ecf20Sopenharmony_ci	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
748c2ecf20Sopenharmony_ci	page = radix_tree_lookup(&brd->brd_pages, idx);
758c2ecf20Sopenharmony_ci	rcu_read_unlock();
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	BUG_ON(page && page->index != idx);
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	return page;
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci/*
838c2ecf20Sopenharmony_ci * Insert a new page for a given sector, if one does not already exist.
848c2ecf20Sopenharmony_ci */
858c2ecf20Sopenharmony_cistatic int brd_insert_page(struct brd_device *brd, sector_t sector)
868c2ecf20Sopenharmony_ci{
878c2ecf20Sopenharmony_ci	pgoff_t idx;
888c2ecf20Sopenharmony_ci	struct page *page;
898c2ecf20Sopenharmony_ci	gfp_t gfp_flags;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	page = brd_lookup_page(brd, sector);
928c2ecf20Sopenharmony_ci	if (page)
938c2ecf20Sopenharmony_ci		return 0;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	/*
968c2ecf20Sopenharmony_ci	 * Must use NOIO because we don't want to recurse back into the
978c2ecf20Sopenharmony_ci	 * block or filesystem layers from page reclaim.
988c2ecf20Sopenharmony_ci	 */
998c2ecf20Sopenharmony_ci	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
1008c2ecf20Sopenharmony_ci	page = alloc_page(gfp_flags);
1018c2ecf20Sopenharmony_ci	if (!page)
1028c2ecf20Sopenharmony_ci		return -ENOMEM;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	if (radix_tree_preload(GFP_NOIO)) {
1058c2ecf20Sopenharmony_ci		__free_page(page);
1068c2ecf20Sopenharmony_ci		return -ENOMEM;
1078c2ecf20Sopenharmony_ci	}
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	spin_lock(&brd->brd_lock);
1108c2ecf20Sopenharmony_ci	idx = sector >> PAGE_SECTORS_SHIFT;
1118c2ecf20Sopenharmony_ci	page->index = idx;
1128c2ecf20Sopenharmony_ci	if (radix_tree_insert(&brd->brd_pages, idx, page)) {
1138c2ecf20Sopenharmony_ci		__free_page(page);
1148c2ecf20Sopenharmony_ci		page = radix_tree_lookup(&brd->brd_pages, idx);
1158c2ecf20Sopenharmony_ci		BUG_ON(!page);
1168c2ecf20Sopenharmony_ci		BUG_ON(page->index != idx);
1178c2ecf20Sopenharmony_ci	}
1188c2ecf20Sopenharmony_ci	spin_unlock(&brd->brd_lock);
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	radix_tree_preload_end();
1218c2ecf20Sopenharmony_ci	return 0;
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci/*
1258c2ecf20Sopenharmony_ci * Free all backing store pages and radix tree. This must only be called when
1268c2ecf20Sopenharmony_ci * there are no other users of the device.
1278c2ecf20Sopenharmony_ci */
1288c2ecf20Sopenharmony_ci#define FREE_BATCH 16
1298c2ecf20Sopenharmony_cistatic void brd_free_pages(struct brd_device *brd)
1308c2ecf20Sopenharmony_ci{
1318c2ecf20Sopenharmony_ci	unsigned long pos = 0;
1328c2ecf20Sopenharmony_ci	struct page *pages[FREE_BATCH];
1338c2ecf20Sopenharmony_ci	int nr_pages;
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	do {
1368c2ecf20Sopenharmony_ci		int i;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci		nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
1398c2ecf20Sopenharmony_ci				(void **)pages, pos, FREE_BATCH);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci		for (i = 0; i < nr_pages; i++) {
1428c2ecf20Sopenharmony_ci			void *ret;
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci			BUG_ON(pages[i]->index < pos);
1458c2ecf20Sopenharmony_ci			pos = pages[i]->index;
1468c2ecf20Sopenharmony_ci			ret = radix_tree_delete(&brd->brd_pages, pos);
1478c2ecf20Sopenharmony_ci			BUG_ON(!ret || ret != pages[i]);
1488c2ecf20Sopenharmony_ci			__free_page(pages[i]);
1498c2ecf20Sopenharmony_ci		}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci		pos++;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci		/*
1548c2ecf20Sopenharmony_ci		 * It takes 3.4 seconds to remove 80GiB ramdisk.
1558c2ecf20Sopenharmony_ci		 * So, we need cond_resched to avoid stalling the CPU.
1568c2ecf20Sopenharmony_ci		 */
1578c2ecf20Sopenharmony_ci		cond_resched();
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci		/*
1608c2ecf20Sopenharmony_ci		 * This assumes radix_tree_gang_lookup always returns as
1618c2ecf20Sopenharmony_ci		 * many pages as possible. If the radix-tree code changes,
1628c2ecf20Sopenharmony_ci		 * so will this have to.
1638c2ecf20Sopenharmony_ci		 */
1648c2ecf20Sopenharmony_ci	} while (nr_pages == FREE_BATCH);
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci/*
1688c2ecf20Sopenharmony_ci * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
1698c2ecf20Sopenharmony_ci */
1708c2ecf20Sopenharmony_cistatic int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
1738c2ecf20Sopenharmony_ci	size_t copy;
1748c2ecf20Sopenharmony_ci	int ret;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	copy = min_t(size_t, n, PAGE_SIZE - offset);
1778c2ecf20Sopenharmony_ci	ret = brd_insert_page(brd, sector);
1788c2ecf20Sopenharmony_ci	if (ret)
1798c2ecf20Sopenharmony_ci		return ret;
1808c2ecf20Sopenharmony_ci	if (copy < n) {
1818c2ecf20Sopenharmony_ci		sector += copy >> SECTOR_SHIFT;
1828c2ecf20Sopenharmony_ci		ret = brd_insert_page(brd, sector);
1838c2ecf20Sopenharmony_ci	}
1848c2ecf20Sopenharmony_ci	return ret;
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci/*
1888c2ecf20Sopenharmony_ci * Copy n bytes from src to the brd starting at sector. Does not sleep.
1898c2ecf20Sopenharmony_ci */
1908c2ecf20Sopenharmony_cistatic void copy_to_brd(struct brd_device *brd, const void *src,
1918c2ecf20Sopenharmony_ci			sector_t sector, size_t n)
1928c2ecf20Sopenharmony_ci{
1938c2ecf20Sopenharmony_ci	struct page *page;
1948c2ecf20Sopenharmony_ci	void *dst;
1958c2ecf20Sopenharmony_ci	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
1968c2ecf20Sopenharmony_ci	size_t copy;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	copy = min_t(size_t, n, PAGE_SIZE - offset);
1998c2ecf20Sopenharmony_ci	page = brd_lookup_page(brd, sector);
2008c2ecf20Sopenharmony_ci	BUG_ON(!page);
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	dst = kmap_atomic(page);
2038c2ecf20Sopenharmony_ci	memcpy(dst + offset, src, copy);
2048c2ecf20Sopenharmony_ci	kunmap_atomic(dst);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	if (copy < n) {
2078c2ecf20Sopenharmony_ci		src += copy;
2088c2ecf20Sopenharmony_ci		sector += copy >> SECTOR_SHIFT;
2098c2ecf20Sopenharmony_ci		copy = n - copy;
2108c2ecf20Sopenharmony_ci		page = brd_lookup_page(brd, sector);
2118c2ecf20Sopenharmony_ci		BUG_ON(!page);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci		dst = kmap_atomic(page);
2148c2ecf20Sopenharmony_ci		memcpy(dst, src, copy);
2158c2ecf20Sopenharmony_ci		kunmap_atomic(dst);
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci/*
2208c2ecf20Sopenharmony_ci * Copy n bytes to dst from the brd starting at sector. Does not sleep.
2218c2ecf20Sopenharmony_ci */
2228c2ecf20Sopenharmony_cistatic void copy_from_brd(void *dst, struct brd_device *brd,
2238c2ecf20Sopenharmony_ci			sector_t sector, size_t n)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	struct page *page;
2268c2ecf20Sopenharmony_ci	void *src;
2278c2ecf20Sopenharmony_ci	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
2288c2ecf20Sopenharmony_ci	size_t copy;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	copy = min_t(size_t, n, PAGE_SIZE - offset);
2318c2ecf20Sopenharmony_ci	page = brd_lookup_page(brd, sector);
2328c2ecf20Sopenharmony_ci	if (page) {
2338c2ecf20Sopenharmony_ci		src = kmap_atomic(page);
2348c2ecf20Sopenharmony_ci		memcpy(dst, src + offset, copy);
2358c2ecf20Sopenharmony_ci		kunmap_atomic(src);
2368c2ecf20Sopenharmony_ci	} else
2378c2ecf20Sopenharmony_ci		memset(dst, 0, copy);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	if (copy < n) {
2408c2ecf20Sopenharmony_ci		dst += copy;
2418c2ecf20Sopenharmony_ci		sector += copy >> SECTOR_SHIFT;
2428c2ecf20Sopenharmony_ci		copy = n - copy;
2438c2ecf20Sopenharmony_ci		page = brd_lookup_page(brd, sector);
2448c2ecf20Sopenharmony_ci		if (page) {
2458c2ecf20Sopenharmony_ci			src = kmap_atomic(page);
2468c2ecf20Sopenharmony_ci			memcpy(dst, src, copy);
2478c2ecf20Sopenharmony_ci			kunmap_atomic(src);
2488c2ecf20Sopenharmony_ci		} else
2498c2ecf20Sopenharmony_ci			memset(dst, 0, copy);
2508c2ecf20Sopenharmony_ci	}
2518c2ecf20Sopenharmony_ci}
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci/*
2548c2ecf20Sopenharmony_ci * Process a single bvec of a bio.
2558c2ecf20Sopenharmony_ci */
2568c2ecf20Sopenharmony_cistatic int brd_do_bvec(struct brd_device *brd, struct page *page,
2578c2ecf20Sopenharmony_ci			unsigned int len, unsigned int off, unsigned int op,
2588c2ecf20Sopenharmony_ci			sector_t sector)
2598c2ecf20Sopenharmony_ci{
2608c2ecf20Sopenharmony_ci	void *mem;
2618c2ecf20Sopenharmony_ci	int err = 0;
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	if (op_is_write(op)) {
2648c2ecf20Sopenharmony_ci		err = copy_to_brd_setup(brd, sector, len);
2658c2ecf20Sopenharmony_ci		if (err)
2668c2ecf20Sopenharmony_ci			goto out;
2678c2ecf20Sopenharmony_ci	}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	mem = kmap_atomic(page);
2708c2ecf20Sopenharmony_ci	if (!op_is_write(op)) {
2718c2ecf20Sopenharmony_ci		copy_from_brd(mem + off, brd, sector, len);
2728c2ecf20Sopenharmony_ci		flush_dcache_page(page);
2738c2ecf20Sopenharmony_ci	} else {
2748c2ecf20Sopenharmony_ci		flush_dcache_page(page);
2758c2ecf20Sopenharmony_ci		copy_to_brd(brd, mem + off, sector, len);
2768c2ecf20Sopenharmony_ci	}
2778c2ecf20Sopenharmony_ci	kunmap_atomic(mem);
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ciout:
2808c2ecf20Sopenharmony_ci	return err;
2818c2ecf20Sopenharmony_ci}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_cistatic blk_qc_t brd_submit_bio(struct bio *bio)
2848c2ecf20Sopenharmony_ci{
2858c2ecf20Sopenharmony_ci	struct brd_device *brd = bio->bi_disk->private_data;
2868c2ecf20Sopenharmony_ci	struct bio_vec bvec;
2878c2ecf20Sopenharmony_ci	sector_t sector;
2888c2ecf20Sopenharmony_ci	struct bvec_iter iter;
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	sector = bio->bi_iter.bi_sector;
2918c2ecf20Sopenharmony_ci	if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
2928c2ecf20Sopenharmony_ci		goto io_error;
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	bio_for_each_segment(bvec, bio, iter) {
2958c2ecf20Sopenharmony_ci		unsigned int len = bvec.bv_len;
2968c2ecf20Sopenharmony_ci		int err;
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci		/* Don't support un-aligned buffer */
2998c2ecf20Sopenharmony_ci		WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
3008c2ecf20Sopenharmony_ci				(len & (SECTOR_SIZE - 1)));
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
3038c2ecf20Sopenharmony_ci				  bio_op(bio), sector);
3048c2ecf20Sopenharmony_ci		if (err)
3058c2ecf20Sopenharmony_ci			goto io_error;
3068c2ecf20Sopenharmony_ci		sector += len >> SECTOR_SHIFT;
3078c2ecf20Sopenharmony_ci	}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	bio_endio(bio);
3108c2ecf20Sopenharmony_ci	return BLK_QC_T_NONE;
3118c2ecf20Sopenharmony_ciio_error:
3128c2ecf20Sopenharmony_ci	bio_io_error(bio);
3138c2ecf20Sopenharmony_ci	return BLK_QC_T_NONE;
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_cistatic int brd_rw_page(struct block_device *bdev, sector_t sector,
3178c2ecf20Sopenharmony_ci		       struct page *page, unsigned int op)
3188c2ecf20Sopenharmony_ci{
3198c2ecf20Sopenharmony_ci	struct brd_device *brd = bdev->bd_disk->private_data;
3208c2ecf20Sopenharmony_ci	int err;
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	if (PageTransHuge(page))
3238c2ecf20Sopenharmony_ci		return -ENOTSUPP;
3248c2ecf20Sopenharmony_ci	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
3258c2ecf20Sopenharmony_ci	page_endio(page, op_is_write(op), err);
3268c2ecf20Sopenharmony_ci	return err;
3278c2ecf20Sopenharmony_ci}
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_cistatic const struct block_device_operations brd_fops = {
3308c2ecf20Sopenharmony_ci	.owner =		THIS_MODULE,
3318c2ecf20Sopenharmony_ci	.submit_bio =		brd_submit_bio,
3328c2ecf20Sopenharmony_ci	.rw_page =		brd_rw_page,
3338c2ecf20Sopenharmony_ci};
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci/*
3368c2ecf20Sopenharmony_ci * And now the modules code and kernel interface.
3378c2ecf20Sopenharmony_ci */
3388c2ecf20Sopenharmony_cistatic int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
3398c2ecf20Sopenharmony_cimodule_param(rd_nr, int, 0444);
3408c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ciunsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
3438c2ecf20Sopenharmony_cimodule_param(rd_size, ulong, 0444);
3448c2ecf20Sopenharmony_ciMODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_cistatic int max_part = 1;
3478c2ecf20Sopenharmony_cimodule_param(max_part, int, 0444);
3488c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
3518c2ecf20Sopenharmony_ciMODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
3528c2ecf20Sopenharmony_ciMODULE_ALIAS("rd");
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci#ifndef MODULE
3558c2ecf20Sopenharmony_ci/* Legacy boot options - nonmodular */
3568c2ecf20Sopenharmony_cistatic int __init ramdisk_size(char *str)
3578c2ecf20Sopenharmony_ci{
3588c2ecf20Sopenharmony_ci	rd_size = simple_strtol(str, NULL, 0);
3598c2ecf20Sopenharmony_ci	return 1;
3608c2ecf20Sopenharmony_ci}
3618c2ecf20Sopenharmony_ci__setup("ramdisk_size=", ramdisk_size);
3628c2ecf20Sopenharmony_ci#endif
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci/*
3658c2ecf20Sopenharmony_ci * The device scheme is derived from loop.c. Keep them in synch where possible
3668c2ecf20Sopenharmony_ci * (should share code eventually).
3678c2ecf20Sopenharmony_ci */
3688c2ecf20Sopenharmony_cistatic LIST_HEAD(brd_devices);
3698c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(brd_devices_mutex);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_cistatic struct brd_device *brd_alloc(int i)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	struct brd_device *brd;
3748c2ecf20Sopenharmony_ci	struct gendisk *disk;
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
3778c2ecf20Sopenharmony_ci	if (!brd)
3788c2ecf20Sopenharmony_ci		goto out;
3798c2ecf20Sopenharmony_ci	brd->brd_number		= i;
3808c2ecf20Sopenharmony_ci	spin_lock_init(&brd->brd_lock);
3818c2ecf20Sopenharmony_ci	INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
3848c2ecf20Sopenharmony_ci	if (!brd->brd_queue)
3858c2ecf20Sopenharmony_ci		goto out_free_dev;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	/* This is so fdisk will align partitions on 4k, because of
3888c2ecf20Sopenharmony_ci	 * direct_access API needing 4k alignment, returning a PFN
3898c2ecf20Sopenharmony_ci	 * (This is only a problem on very small devices <= 4M,
3908c2ecf20Sopenharmony_ci	 *  otherwise fdisk will align on 1M. Regardless this call
3918c2ecf20Sopenharmony_ci	 *  is harmless)
3928c2ecf20Sopenharmony_ci	 */
3938c2ecf20Sopenharmony_ci	blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
3948c2ecf20Sopenharmony_ci	disk = brd->brd_disk = alloc_disk(max_part);
3958c2ecf20Sopenharmony_ci	if (!disk)
3968c2ecf20Sopenharmony_ci		goto out_free_queue;
3978c2ecf20Sopenharmony_ci	disk->major		= RAMDISK_MAJOR;
3988c2ecf20Sopenharmony_ci	disk->first_minor	= i * max_part;
3998c2ecf20Sopenharmony_ci	disk->fops		= &brd_fops;
4008c2ecf20Sopenharmony_ci	disk->private_data	= brd;
4018c2ecf20Sopenharmony_ci	disk->flags		= GENHD_FL_EXT_DEVT;
4028c2ecf20Sopenharmony_ci	sprintf(disk->disk_name, "ram%d", i);
4038c2ecf20Sopenharmony_ci	set_capacity(disk, rd_size * 2);
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci	/* Tell the block layer that this is not a rotational device */
4068c2ecf20Sopenharmony_ci	blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
4078c2ecf20Sopenharmony_ci	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	return brd;
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ciout_free_queue:
4128c2ecf20Sopenharmony_ci	blk_cleanup_queue(brd->brd_queue);
4138c2ecf20Sopenharmony_ciout_free_dev:
4148c2ecf20Sopenharmony_ci	kfree(brd);
4158c2ecf20Sopenharmony_ciout:
4168c2ecf20Sopenharmony_ci	return NULL;
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_cistatic void brd_free(struct brd_device *brd)
4208c2ecf20Sopenharmony_ci{
4218c2ecf20Sopenharmony_ci	put_disk(brd->brd_disk);
4228c2ecf20Sopenharmony_ci	blk_cleanup_queue(brd->brd_queue);
4238c2ecf20Sopenharmony_ci	brd_free_pages(brd);
4248c2ecf20Sopenharmony_ci	kfree(brd);
4258c2ecf20Sopenharmony_ci}
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_cistatic struct brd_device *brd_init_one(int i, bool *new)
4288c2ecf20Sopenharmony_ci{
4298c2ecf20Sopenharmony_ci	struct brd_device *brd;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	*new = false;
4328c2ecf20Sopenharmony_ci	list_for_each_entry(brd, &brd_devices, brd_list) {
4338c2ecf20Sopenharmony_ci		if (brd->brd_number == i)
4348c2ecf20Sopenharmony_ci			goto out;
4358c2ecf20Sopenharmony_ci	}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	brd = brd_alloc(i);
4388c2ecf20Sopenharmony_ci	if (brd) {
4398c2ecf20Sopenharmony_ci		brd->brd_disk->queue = brd->brd_queue;
4408c2ecf20Sopenharmony_ci		add_disk(brd->brd_disk);
4418c2ecf20Sopenharmony_ci		list_add_tail(&brd->brd_list, &brd_devices);
4428c2ecf20Sopenharmony_ci	}
4438c2ecf20Sopenharmony_ci	*new = true;
4448c2ecf20Sopenharmony_ciout:
4458c2ecf20Sopenharmony_ci	return brd;
4468c2ecf20Sopenharmony_ci}
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_cistatic void brd_del_one(struct brd_device *brd)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	list_del(&brd->brd_list);
4518c2ecf20Sopenharmony_ci	del_gendisk(brd->brd_disk);
4528c2ecf20Sopenharmony_ci	brd_free(brd);
4538c2ecf20Sopenharmony_ci}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_cistatic struct kobject *brd_probe(dev_t dev, int *part, void *data)
4568c2ecf20Sopenharmony_ci{
4578c2ecf20Sopenharmony_ci	struct brd_device *brd;
4588c2ecf20Sopenharmony_ci	struct kobject *kobj;
4598c2ecf20Sopenharmony_ci	bool new;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	mutex_lock(&brd_devices_mutex);
4628c2ecf20Sopenharmony_ci	brd = brd_init_one(MINOR(dev) / max_part, &new);
4638c2ecf20Sopenharmony_ci	kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL;
4648c2ecf20Sopenharmony_ci	mutex_unlock(&brd_devices_mutex);
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	if (new)
4678c2ecf20Sopenharmony_ci		*part = 0;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	return kobj;
4708c2ecf20Sopenharmony_ci}
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_cistatic inline void brd_check_and_reset_par(void)
4738c2ecf20Sopenharmony_ci{
4748c2ecf20Sopenharmony_ci	if (unlikely(!max_part))
4758c2ecf20Sopenharmony_ci		max_part = 1;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	/*
4788c2ecf20Sopenharmony_ci	 * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
4798c2ecf20Sopenharmony_ci	 * otherwise, it is possiable to get same dev_t when adding partitions.
4808c2ecf20Sopenharmony_ci	 */
4818c2ecf20Sopenharmony_ci	if ((1U << MINORBITS) % max_part != 0)
4828c2ecf20Sopenharmony_ci		max_part = 1UL << fls(max_part);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	if (max_part > DISK_MAX_PARTS) {
4858c2ecf20Sopenharmony_ci		pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
4868c2ecf20Sopenharmony_ci			DISK_MAX_PARTS, DISK_MAX_PARTS);
4878c2ecf20Sopenharmony_ci		max_part = DISK_MAX_PARTS;
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci}
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_cistatic int __init brd_init(void)
4928c2ecf20Sopenharmony_ci{
4938c2ecf20Sopenharmony_ci	struct brd_device *brd, *next;
4948c2ecf20Sopenharmony_ci	int i;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	/*
4978c2ecf20Sopenharmony_ci	 * brd module now has a feature to instantiate underlying device
4988c2ecf20Sopenharmony_ci	 * structure on-demand, provided that there is an access dev node.
4998c2ecf20Sopenharmony_ci	 *
5008c2ecf20Sopenharmony_ci	 * (1) if rd_nr is specified, create that many upfront. else
5018c2ecf20Sopenharmony_ci	 *     it defaults to CONFIG_BLK_DEV_RAM_COUNT
5028c2ecf20Sopenharmony_ci	 * (2) User can further extend brd devices by create dev node themselves
5038c2ecf20Sopenharmony_ci	 *     and have kernel automatically instantiate actual device
5048c2ecf20Sopenharmony_ci	 *     on-demand. Example:
5058c2ecf20Sopenharmony_ci	 *		mknod /path/devnod_name b 1 X	# 1 is the rd major
5068c2ecf20Sopenharmony_ci	 *		fdisk -l /path/devnod_name
5078c2ecf20Sopenharmony_ci	 *	If (X / max_part) was not already created it will be created
5088c2ecf20Sopenharmony_ci	 *	dynamically.
5098c2ecf20Sopenharmony_ci	 */
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
5128c2ecf20Sopenharmony_ci		return -EIO;
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	brd_check_and_reset_par();
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	for (i = 0; i < rd_nr; i++) {
5178c2ecf20Sopenharmony_ci		brd = brd_alloc(i);
5188c2ecf20Sopenharmony_ci		if (!brd)
5198c2ecf20Sopenharmony_ci			goto out_free;
5208c2ecf20Sopenharmony_ci		list_add_tail(&brd->brd_list, &brd_devices);
5218c2ecf20Sopenharmony_ci	}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	/* point of no return */
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	list_for_each_entry(brd, &brd_devices, brd_list) {
5268c2ecf20Sopenharmony_ci		/*
5278c2ecf20Sopenharmony_ci		 * associate with queue just before adding disk for
5288c2ecf20Sopenharmony_ci		 * avoiding to mess up failure path
5298c2ecf20Sopenharmony_ci		 */
5308c2ecf20Sopenharmony_ci		brd->brd_disk->queue = brd->brd_queue;
5318c2ecf20Sopenharmony_ci		add_disk(brd->brd_disk);
5328c2ecf20Sopenharmony_ci	}
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
5358c2ecf20Sopenharmony_ci				  THIS_MODULE, brd_probe, NULL, NULL);
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	pr_info("brd: module loaded\n");
5388c2ecf20Sopenharmony_ci	return 0;
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ciout_free:
5418c2ecf20Sopenharmony_ci	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
5428c2ecf20Sopenharmony_ci		list_del(&brd->brd_list);
5438c2ecf20Sopenharmony_ci		brd_free(brd);
5448c2ecf20Sopenharmony_ci	}
5458c2ecf20Sopenharmony_ci	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	pr_info("brd: module NOT loaded !!!\n");
5488c2ecf20Sopenharmony_ci	return -ENOMEM;
5498c2ecf20Sopenharmony_ci}
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_cistatic void __exit brd_exit(void)
5528c2ecf20Sopenharmony_ci{
5538c2ecf20Sopenharmony_ci	struct brd_device *brd, *next;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
5568c2ecf20Sopenharmony_ci		brd_del_one(brd);
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci	blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS);
5598c2ecf20Sopenharmony_ci	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	pr_info("brd: module unloaded\n");
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cimodule_init(brd_init);
5658c2ecf20Sopenharmony_cimodule_exit(brd_exit);
5668c2ecf20Sopenharmony_ci
567