1/* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15#define KMSG_COMPONENT "zram" 16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18#include <linux/module.h> 19#include <linux/kernel.h> 20#include <linux/bio.h> 21#include <linux/bitops.h> 22#include <linux/blkdev.h> 23#include <linux/buffer_head.h> 24#include <linux/device.h> 25#include <linux/highmem.h> 26#include <linux/slab.h> 27#include <linux/backing-dev.h> 28#include <linux/string.h> 29#include <linux/vmalloc.h> 30#include <linux/err.h> 31#include <linux/idr.h> 32#include <linux/sysfs.h> 33#include <linux/debugfs.h> 34#include <linux/cpuhotplug.h> 35#include <linux/part_stat.h> 36 37#ifdef CONFIG_ZRAM_GROUP 38#include <linux/memcontrol.h> 39#endif 40 41#include "zram_drv.h" 42 43static DEFINE_IDR(zram_index_idr); 44/* idr index must be protected */ 45static DEFINE_MUTEX(zram_index_mutex); 46 47static int zram_major; 48static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 49 50/* Module params (documentation at end) */ 51static unsigned int num_devices = 1; 52/* 53 * Pages that compress to sizes equals or greater than this are stored 54 * uncompressed in memory. 55 */ 56static size_t huge_class_size; 57 58static const struct block_device_operations zram_devops; 59 60static void zram_free_page(struct zram *zram, size_t index); 61static int zram_read_page(struct zram *zram, struct page *page, u32 index, 62 struct bio *parent); 63 64static inline bool init_done(struct zram *zram) 65{ 66 return zram->disksize; 67} 68 69static inline struct zram *dev_to_zram(struct device *dev) 70{ 71 return (struct zram *)dev_to_disk(dev)->private_data; 72} 73 74static inline void zram_set_element(struct zram *zram, u32 index, 75 unsigned long element) 76{ 77 zram->table[index].element = element; 78} 79 80static unsigned long zram_get_element(struct zram *zram, u32 index) 81{ 82 return zram->table[index].element; 83} 84 85static inline bool zram_allocated(struct zram *zram, u32 index) 86{ 87 return zram_get_obj_size(zram, index) || 88 zram_test_flag(zram, index, ZRAM_SAME) || 89 zram_test_flag(zram, index, ZRAM_WB); 90} 91 92#if PAGE_SIZE != 4096 93static inline bool is_partial_io(struct bio_vec *bvec) 94{ 95 return bvec->bv_len != PAGE_SIZE; 96} 97#define ZRAM_PARTIAL_IO 1 98#else 99static inline bool is_partial_io(struct bio_vec *bvec) 100{ 101 return false; 102} 103#endif 104 105static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio) 106{ 107 prio &= ZRAM_COMP_PRIORITY_MASK; 108 /* 109 * Clear previous priority value first, in case if we recompress 110 * further an already recompressed page 111 */ 112 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK << 113 ZRAM_COMP_PRIORITY_BIT1); 114 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1); 115} 116 117static inline u32 zram_get_priority(struct zram *zram, u32 index) 118{ 119 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1; 120 121 return prio & ZRAM_COMP_PRIORITY_MASK; 122} 123 124static inline void update_used_max(struct zram *zram, 125 const unsigned long pages) 126{ 127 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages); 128 129 do { 130 if (cur_max >= pages) 131 return; 132 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages, 133 &cur_max, pages)); 134} 135 136static inline void zram_fill_page(void *ptr, unsigned long len, 137 unsigned long value) 138{ 139 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 140 memset_l(ptr, value, len / sizeof(unsigned long)); 141} 142 143static bool page_same_filled(void *ptr, unsigned long *element) 144{ 145 unsigned long *page; 146 unsigned long val; 147 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 148 149 page = (unsigned long *)ptr; 150 val = page[0]; 151 152 if (val != page[last_pos]) 153 return false; 154 155 for (pos = 1; pos < last_pos; pos++) { 156 if (val != page[pos]) 157 return false; 158 } 159 160 *element = val; 161 162 return true; 163} 164 165static ssize_t initstate_show(struct device *dev, 166 struct device_attribute *attr, char *buf) 167{ 168 u32 val; 169 struct zram *zram = dev_to_zram(dev); 170 171 down_read(&zram->init_lock); 172 val = init_done(zram); 173 up_read(&zram->init_lock); 174 175 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 176} 177 178static ssize_t disksize_show(struct device *dev, 179 struct device_attribute *attr, char *buf) 180{ 181 struct zram *zram = dev_to_zram(dev); 182 183 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 184} 185 186static ssize_t mem_limit_store(struct device *dev, 187 struct device_attribute *attr, const char *buf, size_t len) 188{ 189 u64 limit; 190 char *tmp; 191 struct zram *zram = dev_to_zram(dev); 192 193 limit = memparse(buf, &tmp); 194 if (buf == tmp) /* no chars parsed, invalid input */ 195 return -EINVAL; 196 197 down_write(&zram->init_lock); 198 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 199 up_write(&zram->init_lock); 200 201 return len; 202} 203 204static ssize_t mem_used_max_store(struct device *dev, 205 struct device_attribute *attr, const char *buf, size_t len) 206{ 207 int err; 208 unsigned long val; 209 struct zram *zram = dev_to_zram(dev); 210 211 err = kstrtoul(buf, 10, &val); 212 if (err || val != 0) 213 return -EINVAL; 214 215 down_read(&zram->init_lock); 216 if (init_done(zram)) { 217 atomic_long_set(&zram->stats.max_used_pages, 218 zs_get_total_pages(zram->mem_pool)); 219 } 220 up_read(&zram->init_lock); 221 222 return len; 223} 224 225/* 226 * Mark all pages which are older than or equal to cutoff as IDLE. 227 * Callers should hold the zram init lock in read mode 228 */ 229static void mark_idle(struct zram *zram, ktime_t cutoff) 230{ 231 int is_idle = 1; 232 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 233 int index; 234 235 for (index = 0; index < nr_pages; index++) { 236 /* 237 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. 238 * See the comment in writeback_store. 239 */ 240 zram_slot_lock(zram, index); 241 if (zram_allocated(zram, index) && 242 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { 243#ifdef CONFIG_ZRAM_MEMORY_TRACKING 244 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); 245#endif 246 if (is_idle) 247 zram_set_flag(zram, index, ZRAM_IDLE); 248 } 249 zram_slot_unlock(zram, index); 250 } 251} 252 253static ssize_t idle_store(struct device *dev, 254 struct device_attribute *attr, const char *buf, size_t len) 255{ 256 struct zram *zram = dev_to_zram(dev); 257 ktime_t cutoff_time = 0; 258 ssize_t rv = -EINVAL; 259 260 if (!sysfs_streq(buf, "all")) { 261 /* 262 * If it did not parse as 'all' try to treat it as an integer 263 * when we have memory tracking enabled. 264 */ 265 u64 age_sec; 266 267 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) 268 cutoff_time = ktime_sub(ktime_get_boottime(), 269 ns_to_ktime(age_sec * NSEC_PER_SEC)); 270 else 271 goto out; 272 } 273 274 down_read(&zram->init_lock); 275 if (!init_done(zram)) 276 goto out_unlock; 277 278 /* 279 * A cutoff_time of 0 marks everything as idle, this is the 280 * "all" behavior. 281 */ 282 mark_idle(zram, cutoff_time); 283 rv = len; 284 285out_unlock: 286 up_read(&zram->init_lock); 287out: 288 return rv; 289} 290 291#ifdef CONFIG_ZRAM_WRITEBACK 292static ssize_t writeback_limit_enable_store(struct device *dev, 293 struct device_attribute *attr, const char *buf, size_t len) 294{ 295 struct zram *zram = dev_to_zram(dev); 296 u64 val; 297 ssize_t ret = -EINVAL; 298 299 if (kstrtoull(buf, 10, &val)) 300 return ret; 301 302 down_read(&zram->init_lock); 303 spin_lock(&zram->wb_limit_lock); 304 zram->wb_limit_enable = val; 305 spin_unlock(&zram->wb_limit_lock); 306 up_read(&zram->init_lock); 307 ret = len; 308 309 return ret; 310} 311 312static ssize_t writeback_limit_enable_show(struct device *dev, 313 struct device_attribute *attr, char *buf) 314{ 315 bool val; 316 struct zram *zram = dev_to_zram(dev); 317 318 down_read(&zram->init_lock); 319 spin_lock(&zram->wb_limit_lock); 320 val = zram->wb_limit_enable; 321 spin_unlock(&zram->wb_limit_lock); 322 up_read(&zram->init_lock); 323 324 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 325} 326 327static ssize_t writeback_limit_store(struct device *dev, 328 struct device_attribute *attr, const char *buf, size_t len) 329{ 330 struct zram *zram = dev_to_zram(dev); 331 u64 val; 332 ssize_t ret = -EINVAL; 333 334 if (kstrtoull(buf, 10, &val)) 335 return ret; 336 337 down_read(&zram->init_lock); 338 spin_lock(&zram->wb_limit_lock); 339 zram->bd_wb_limit = val; 340 spin_unlock(&zram->wb_limit_lock); 341 up_read(&zram->init_lock); 342 ret = len; 343 344 return ret; 345} 346 347static ssize_t writeback_limit_show(struct device *dev, 348 struct device_attribute *attr, char *buf) 349{ 350 u64 val; 351 struct zram *zram = dev_to_zram(dev); 352 353 down_read(&zram->init_lock); 354 spin_lock(&zram->wb_limit_lock); 355 val = zram->bd_wb_limit; 356 spin_unlock(&zram->wb_limit_lock); 357 up_read(&zram->init_lock); 358 359 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 360} 361 362static void reset_bdev(struct zram *zram) 363{ 364 struct block_device *bdev; 365 366 if (!zram->backing_dev) 367 return; 368 369 bdev = zram->bdev; 370 blkdev_put(bdev, zram); 371 /* hope filp_close flush all of IO */ 372 filp_close(zram->backing_dev, NULL); 373 zram->backing_dev = NULL; 374 zram->bdev = NULL; 375 zram->disk->fops = &zram_devops; 376 kvfree(zram->bitmap); 377 zram->bitmap = NULL; 378} 379 380static ssize_t backing_dev_show(struct device *dev, 381 struct device_attribute *attr, char *buf) 382{ 383 struct file *file; 384 struct zram *zram = dev_to_zram(dev); 385 char *p; 386 ssize_t ret; 387 388 down_read(&zram->init_lock); 389 file = zram->backing_dev; 390 if (!file) { 391 memcpy(buf, "none\n", 5); 392 up_read(&zram->init_lock); 393 return 5; 394 } 395 396 p = file_path(file, buf, PAGE_SIZE - 1); 397 if (IS_ERR(p)) { 398 ret = PTR_ERR(p); 399 goto out; 400 } 401 402 ret = strlen(p); 403 memmove(buf, p, ret); 404 buf[ret++] = '\n'; 405out: 406 up_read(&zram->init_lock); 407 return ret; 408} 409 410static ssize_t backing_dev_store(struct device *dev, 411 struct device_attribute *attr, const char *buf, size_t len) 412{ 413 char *file_name; 414 size_t sz; 415 struct file *backing_dev = NULL; 416 struct inode *inode; 417 struct address_space *mapping; 418 unsigned int bitmap_sz; 419 unsigned long nr_pages, *bitmap = NULL; 420 struct block_device *bdev = NULL; 421 int err; 422 struct zram *zram = dev_to_zram(dev); 423 424 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 425 if (!file_name) 426 return -ENOMEM; 427 428 down_write(&zram->init_lock); 429 if (init_done(zram)) { 430 pr_info("Can't setup backing device for initialized device\n"); 431 err = -EBUSY; 432 goto out; 433 } 434 435 strscpy(file_name, buf, PATH_MAX); 436 /* ignore trailing newline */ 437 sz = strlen(file_name); 438 if (sz > 0 && file_name[sz - 1] == '\n') 439 file_name[sz - 1] = 0x00; 440 441 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 442 if (IS_ERR(backing_dev)) { 443 err = PTR_ERR(backing_dev); 444 backing_dev = NULL; 445 goto out; 446 } 447 448 mapping = backing_dev->f_mapping; 449 inode = mapping->host; 450 451 /* Support only block device in this moment */ 452 if (!S_ISBLK(inode->i_mode)) { 453 err = -ENOTBLK; 454 goto out; 455 } 456 457 bdev = blkdev_get_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE, 458 zram, NULL); 459 if (IS_ERR(bdev)) { 460 err = PTR_ERR(bdev); 461 bdev = NULL; 462 goto out; 463 } 464 465 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 466 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 467 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 468 if (!bitmap) { 469 err = -ENOMEM; 470 goto out; 471 } 472 473 reset_bdev(zram); 474 475 zram->bdev = bdev; 476 zram->backing_dev = backing_dev; 477 zram->bitmap = bitmap; 478 zram->nr_pages = nr_pages; 479 up_write(&zram->init_lock); 480 481 pr_info("setup backing device %s\n", file_name); 482 kfree(file_name); 483 484 return len; 485out: 486 kvfree(bitmap); 487 488 if (bdev) 489 blkdev_put(bdev, zram); 490 491 if (backing_dev) 492 filp_close(backing_dev, NULL); 493 494 up_write(&zram->init_lock); 495 496 kfree(file_name); 497 498 return err; 499} 500 501static unsigned long alloc_block_bdev(struct zram *zram) 502{ 503 unsigned long blk_idx = 1; 504retry: 505 /* skip 0 bit to confuse zram.handle = 0 */ 506 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 507 if (blk_idx == zram->nr_pages) 508 return 0; 509 510 if (test_and_set_bit(blk_idx, zram->bitmap)) 511 goto retry; 512 513 atomic64_inc(&zram->stats.bd_count); 514 return blk_idx; 515} 516 517static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 518{ 519 int was_set; 520 521 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 522 WARN_ON_ONCE(!was_set); 523 atomic64_dec(&zram->stats.bd_count); 524} 525 526static void read_from_bdev_async(struct zram *zram, struct page *page, 527 unsigned long entry, struct bio *parent) 528{ 529 struct bio *bio; 530 531 bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 532 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 533 __bio_add_page(bio, page, PAGE_SIZE, 0); 534 bio_chain(bio, parent); 535 submit_bio(bio); 536} 537 538#define HUGE_WRITEBACK (1<<0) 539#define IDLE_WRITEBACK (1<<1) 540#define INCOMPRESSIBLE_WRITEBACK (1<<2) 541 542static ssize_t writeback_store(struct device *dev, 543 struct device_attribute *attr, const char *buf, size_t len) 544{ 545 struct zram *zram = dev_to_zram(dev); 546 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 547 unsigned long index = 0; 548 struct bio bio; 549 struct bio_vec bio_vec; 550 struct page *page; 551 ssize_t ret = len; 552 int mode, err; 553 unsigned long blk_idx = 0; 554 555 if (sysfs_streq(buf, "idle")) 556 mode = IDLE_WRITEBACK; 557 else if (sysfs_streq(buf, "huge")) 558 mode = HUGE_WRITEBACK; 559 else if (sysfs_streq(buf, "huge_idle")) 560 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 561 else if (sysfs_streq(buf, "incompressible")) 562 mode = INCOMPRESSIBLE_WRITEBACK; 563 else 564 return -EINVAL; 565 566 down_read(&zram->init_lock); 567 if (!init_done(zram)) { 568 ret = -EINVAL; 569 goto release_init_lock; 570 } 571 572 if (!zram->backing_dev) { 573 ret = -ENODEV; 574 goto release_init_lock; 575 } 576 577 page = alloc_page(GFP_KERNEL); 578 if (!page) { 579 ret = -ENOMEM; 580 goto release_init_lock; 581 } 582 583 for (index = 0; index < nr_pages; index++) { 584 spin_lock(&zram->wb_limit_lock); 585 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 586 spin_unlock(&zram->wb_limit_lock); 587 ret = -EIO; 588 break; 589 } 590 spin_unlock(&zram->wb_limit_lock); 591 592 if (!blk_idx) { 593 blk_idx = alloc_block_bdev(zram); 594 if (!blk_idx) { 595 ret = -ENOSPC; 596 break; 597 } 598 } 599 600 zram_slot_lock(zram, index); 601 if (!zram_allocated(zram, index)) 602 goto next; 603 604 if (zram_test_flag(zram, index, ZRAM_WB) || 605 zram_test_flag(zram, index, ZRAM_SAME) || 606 zram_test_flag(zram, index, ZRAM_UNDER_WB)) 607 goto next; 608 609 if (mode & IDLE_WRITEBACK && 610 !zram_test_flag(zram, index, ZRAM_IDLE)) 611 goto next; 612 if (mode & HUGE_WRITEBACK && 613 !zram_test_flag(zram, index, ZRAM_HUGE)) 614 goto next; 615 if (mode & INCOMPRESSIBLE_WRITEBACK && 616 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 617 goto next; 618 619 /* 620 * Clearing ZRAM_UNDER_WB is duty of caller. 621 * IOW, zram_free_page never clear it. 622 */ 623 zram_set_flag(zram, index, ZRAM_UNDER_WB); 624 /* Need for hugepage writeback racing */ 625 zram_set_flag(zram, index, ZRAM_IDLE); 626 zram_slot_unlock(zram, index); 627 if (zram_read_page(zram, page, index, NULL)) { 628 zram_slot_lock(zram, index); 629 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 630 zram_clear_flag(zram, index, ZRAM_IDLE); 631 zram_slot_unlock(zram, index); 632 continue; 633 } 634 635 bio_init(&bio, zram->bdev, &bio_vec, 1, 636 REQ_OP_WRITE | REQ_SYNC); 637 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 638 __bio_add_page(&bio, page, PAGE_SIZE, 0); 639 640 /* 641 * XXX: A single page IO would be inefficient for write 642 * but it would be not bad as starter. 643 */ 644 err = submit_bio_wait(&bio); 645 if (err) { 646 zram_slot_lock(zram, index); 647 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 648 zram_clear_flag(zram, index, ZRAM_IDLE); 649 zram_slot_unlock(zram, index); 650 /* 651 * BIO errors are not fatal, we continue and simply 652 * attempt to writeback the remaining objects (pages). 653 * At the same time we need to signal user-space that 654 * some writes (at least one, but also could be all of 655 * them) were not successful and we do so by returning 656 * the most recent BIO error. 657 */ 658 ret = err; 659 continue; 660 } 661 662 atomic64_inc(&zram->stats.bd_writes); 663 /* 664 * We released zram_slot_lock so need to check if the slot was 665 * changed. If there is freeing for the slot, we can catch it 666 * easily by zram_allocated. 667 * A subtle case is the slot is freed/reallocated/marked as 668 * ZRAM_IDLE again. To close the race, idle_store doesn't 669 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. 670 * Thus, we could close the race by checking ZRAM_IDLE bit. 671 */ 672 zram_slot_lock(zram, index); 673 if (!zram_allocated(zram, index) || 674 !zram_test_flag(zram, index, ZRAM_IDLE)) { 675 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 676 zram_clear_flag(zram, index, ZRAM_IDLE); 677 goto next; 678 } 679 680 zram_free_page(zram, index); 681 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 682 zram_set_flag(zram, index, ZRAM_WB); 683 zram_set_element(zram, index, blk_idx); 684 blk_idx = 0; 685 atomic64_inc(&zram->stats.pages_stored); 686 spin_lock(&zram->wb_limit_lock); 687 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 688 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 689 spin_unlock(&zram->wb_limit_lock); 690next: 691 zram_slot_unlock(zram, index); 692 } 693 694 if (blk_idx) 695 free_block_bdev(zram, blk_idx); 696 __free_page(page); 697release_init_lock: 698 up_read(&zram->init_lock); 699 700 return ret; 701} 702 703struct zram_work { 704 struct work_struct work; 705 struct zram *zram; 706 unsigned long entry; 707 struct page *page; 708 int error; 709}; 710 711static void zram_sync_read(struct work_struct *work) 712{ 713 struct zram_work *zw = container_of(work, struct zram_work, work); 714 struct bio_vec bv; 715 struct bio bio; 716 717 bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 718 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 719 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 720 zw->error = submit_bio_wait(&bio); 721} 722 723/* 724 * Block layer want one ->submit_bio to be active at a time, so if we use 725 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 726 * use a worker thread context. 727 */ 728static int read_from_bdev_sync(struct zram *zram, struct page *page, 729 unsigned long entry) 730{ 731 struct zram_work work; 732 733 work.page = page; 734 work.zram = zram; 735 work.entry = entry; 736 737 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 738 queue_work(system_unbound_wq, &work.work); 739 flush_work(&work.work); 740 destroy_work_on_stack(&work.work); 741 742 return work.error; 743} 744 745static int read_from_bdev(struct zram *zram, struct page *page, 746 unsigned long entry, struct bio *parent) 747{ 748 atomic64_inc(&zram->stats.bd_reads); 749 if (!parent) { 750 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 751 return -EIO; 752 return read_from_bdev_sync(zram, page, entry); 753 } 754 read_from_bdev_async(zram, page, entry, parent); 755 return 0; 756} 757#else 758static inline void reset_bdev(struct zram *zram) {}; 759static int read_from_bdev(struct zram *zram, struct page *page, 760 unsigned long entry, struct bio *parent) 761{ 762 return -EIO; 763} 764 765static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 766#endif 767 768#ifdef CONFIG_ZRAM_MEMORY_TRACKING 769 770static struct dentry *zram_debugfs_root; 771 772static void zram_debugfs_create(void) 773{ 774 zram_debugfs_root = debugfs_create_dir("zram", NULL); 775} 776 777static void zram_debugfs_destroy(void) 778{ 779 debugfs_remove_recursive(zram_debugfs_root); 780} 781 782static void zram_accessed(struct zram *zram, u32 index) 783{ 784 zram_clear_flag(zram, index, ZRAM_IDLE); 785 zram->table[index].ac_time = ktime_get_boottime(); 786} 787 788static ssize_t read_block_state(struct file *file, char __user *buf, 789 size_t count, loff_t *ppos) 790{ 791 char *kbuf; 792 ssize_t index, written = 0; 793 struct zram *zram = file->private_data; 794 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 795 struct timespec64 ts; 796 797 kbuf = kvmalloc(count, GFP_KERNEL); 798 if (!kbuf) 799 return -ENOMEM; 800 801 down_read(&zram->init_lock); 802 if (!init_done(zram)) { 803 up_read(&zram->init_lock); 804 kvfree(kbuf); 805 return -EINVAL; 806 } 807 808 for (index = *ppos; index < nr_pages; index++) { 809 int copied; 810 811 zram_slot_lock(zram, index); 812 if (!zram_allocated(zram, index)) 813 goto next; 814 815 ts = ktime_to_timespec64(zram->table[index].ac_time); 816 copied = snprintf(kbuf + written, count, 817 "%12zd %12lld.%06lu %c%c%c%c%c%c\n", 818 index, (s64)ts.tv_sec, 819 ts.tv_nsec / NSEC_PER_USEC, 820 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 821 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 822 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 823 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.', 824 zram_get_priority(zram, index) ? 'r' : '.', 825 zram_test_flag(zram, index, 826 ZRAM_INCOMPRESSIBLE) ? 'n' : '.'); 827 828 if (count <= copied) { 829 zram_slot_unlock(zram, index); 830 break; 831 } 832 written += copied; 833 count -= copied; 834next: 835 zram_slot_unlock(zram, index); 836 *ppos += 1; 837 } 838 839 up_read(&zram->init_lock); 840 if (copy_to_user(buf, kbuf, written)) 841 written = -EFAULT; 842 kvfree(kbuf); 843 844 return written; 845} 846 847static const struct file_operations proc_zram_block_state_op = { 848 .open = simple_open, 849 .read = read_block_state, 850 .llseek = default_llseek, 851}; 852 853static void zram_debugfs_register(struct zram *zram) 854{ 855 if (!zram_debugfs_root) 856 return; 857 858 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 859 zram_debugfs_root); 860 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 861 zram, &proc_zram_block_state_op); 862} 863 864static void zram_debugfs_unregister(struct zram *zram) 865{ 866 debugfs_remove_recursive(zram->debugfs_dir); 867} 868#else 869static void zram_debugfs_create(void) {}; 870static void zram_debugfs_destroy(void) {}; 871static void zram_accessed(struct zram *zram, u32 index) 872{ 873 zram_clear_flag(zram, index, ZRAM_IDLE); 874}; 875static void zram_debugfs_register(struct zram *zram) {}; 876static void zram_debugfs_unregister(struct zram *zram) {}; 877#endif 878 879/* 880 * We switched to per-cpu streams and this attr is not needed anymore. 881 * However, we will keep it around for some time, because: 882 * a) we may revert per-cpu streams in the future 883 * b) it's visible to user space and we need to follow our 2 years 884 * retirement rule; but we already have a number of 'soon to be 885 * altered' attrs, so max_comp_streams need to wait for the next 886 * layoff cycle. 887 */ 888static ssize_t max_comp_streams_show(struct device *dev, 889 struct device_attribute *attr, char *buf) 890{ 891 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 892} 893 894static ssize_t max_comp_streams_store(struct device *dev, 895 struct device_attribute *attr, const char *buf, size_t len) 896{ 897 return len; 898} 899 900static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) 901{ 902 /* Do not free statically defined compression algorithms */ 903 if (zram->comp_algs[prio] != default_compressor) 904 kfree(zram->comp_algs[prio]); 905 906 zram->comp_algs[prio] = alg; 907} 908 909static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf) 910{ 911 ssize_t sz; 912 913 down_read(&zram->init_lock); 914 sz = zcomp_available_show(zram->comp_algs[prio], buf); 915 up_read(&zram->init_lock); 916 917 return sz; 918} 919 920static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) 921{ 922 char *compressor; 923 size_t sz; 924 925 sz = strlen(buf); 926 if (sz >= CRYPTO_MAX_ALG_NAME) 927 return -E2BIG; 928 929 compressor = kstrdup(buf, GFP_KERNEL); 930 if (!compressor) 931 return -ENOMEM; 932 933 /* ignore trailing newline */ 934 if (sz > 0 && compressor[sz - 1] == '\n') 935 compressor[sz - 1] = 0x00; 936 937 if (!zcomp_available_algorithm(compressor)) { 938 kfree(compressor); 939 return -EINVAL; 940 } 941 942 down_write(&zram->init_lock); 943 if (init_done(zram)) { 944 up_write(&zram->init_lock); 945 kfree(compressor); 946 pr_info("Can't change algorithm for initialized device\n"); 947 return -EBUSY; 948 } 949 950 comp_algorithm_set(zram, prio, compressor); 951 up_write(&zram->init_lock); 952 return 0; 953} 954 955static ssize_t comp_algorithm_show(struct device *dev, 956 struct device_attribute *attr, 957 char *buf) 958{ 959 struct zram *zram = dev_to_zram(dev); 960 961 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf); 962} 963 964static ssize_t comp_algorithm_store(struct device *dev, 965 struct device_attribute *attr, 966 const char *buf, 967 size_t len) 968{ 969 struct zram *zram = dev_to_zram(dev); 970 int ret; 971 972 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf); 973 return ret ? ret : len; 974} 975 976#ifdef CONFIG_ZRAM_MULTI_COMP 977static ssize_t recomp_algorithm_show(struct device *dev, 978 struct device_attribute *attr, 979 char *buf) 980{ 981 struct zram *zram = dev_to_zram(dev); 982 ssize_t sz = 0; 983 u32 prio; 984 985 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { 986 if (!zram->comp_algs[prio]) 987 continue; 988 989 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio); 990 sz += __comp_algorithm_show(zram, prio, buf + sz); 991 } 992 993 return sz; 994} 995 996static ssize_t recomp_algorithm_store(struct device *dev, 997 struct device_attribute *attr, 998 const char *buf, 999 size_t len) 1000{ 1001 struct zram *zram = dev_to_zram(dev); 1002 int prio = ZRAM_SECONDARY_COMP; 1003 char *args, *param, *val; 1004 char *alg = NULL; 1005 int ret; 1006 1007 args = skip_spaces(buf); 1008 while (*args) { 1009 args = next_arg(args, ¶m, &val); 1010 1011 if (!val || !*val) 1012 return -EINVAL; 1013 1014 if (!strcmp(param, "algo")) { 1015 alg = val; 1016 continue; 1017 } 1018 1019 if (!strcmp(param, "priority")) { 1020 ret = kstrtoint(val, 10, &prio); 1021 if (ret) 1022 return ret; 1023 continue; 1024 } 1025 } 1026 1027 if (!alg) 1028 return -EINVAL; 1029 1030 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) 1031 return -EINVAL; 1032 1033 ret = __comp_algorithm_store(zram, prio, alg); 1034 return ret ? ret : len; 1035} 1036#endif 1037 1038static ssize_t compact_store(struct device *dev, 1039 struct device_attribute *attr, const char *buf, size_t len) 1040{ 1041 struct zram *zram = dev_to_zram(dev); 1042 1043 down_read(&zram->init_lock); 1044 if (!init_done(zram)) { 1045 up_read(&zram->init_lock); 1046 return -EINVAL; 1047 } 1048 1049 zs_compact(zram->mem_pool); 1050 up_read(&zram->init_lock); 1051 1052 return len; 1053} 1054 1055static ssize_t io_stat_show(struct device *dev, 1056 struct device_attribute *attr, char *buf) 1057{ 1058 struct zram *zram = dev_to_zram(dev); 1059 ssize_t ret; 1060 1061 down_read(&zram->init_lock); 1062 ret = scnprintf(buf, PAGE_SIZE, 1063 "%8llu %8llu 0 %8llu\n", 1064 (u64)atomic64_read(&zram->stats.failed_reads), 1065 (u64)atomic64_read(&zram->stats.failed_writes), 1066 (u64)atomic64_read(&zram->stats.notify_free)); 1067 up_read(&zram->init_lock); 1068 1069 return ret; 1070} 1071 1072static ssize_t mm_stat_show(struct device *dev, 1073 struct device_attribute *attr, char *buf) 1074{ 1075 struct zram *zram = dev_to_zram(dev); 1076 struct zs_pool_stats pool_stats; 1077 u64 orig_size, mem_used = 0; 1078 long max_used; 1079 ssize_t ret; 1080 1081 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1082 1083 down_read(&zram->init_lock); 1084 if (init_done(zram)) { 1085 mem_used = zs_get_total_pages(zram->mem_pool); 1086 zs_pool_stats(zram->mem_pool, &pool_stats); 1087 } 1088 1089 orig_size = atomic64_read(&zram->stats.pages_stored); 1090 max_used = atomic_long_read(&zram->stats.max_used_pages); 1091 1092 ret = scnprintf(buf, PAGE_SIZE, 1093 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1094 orig_size << PAGE_SHIFT, 1095 (u64)atomic64_read(&zram->stats.compr_data_size), 1096 mem_used << PAGE_SHIFT, 1097 zram->limit_pages << PAGE_SHIFT, 1098 max_used << PAGE_SHIFT, 1099 (u64)atomic64_read(&zram->stats.same_pages), 1100 atomic_long_read(&pool_stats.pages_compacted), 1101 (u64)atomic64_read(&zram->stats.huge_pages), 1102 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1103 up_read(&zram->init_lock); 1104 1105 return ret; 1106} 1107 1108#ifdef CONFIG_ZRAM_WRITEBACK 1109#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1110static ssize_t bd_stat_show(struct device *dev, 1111 struct device_attribute *attr, char *buf) 1112{ 1113 struct zram *zram = dev_to_zram(dev); 1114 ssize_t ret; 1115 1116 down_read(&zram->init_lock); 1117 ret = scnprintf(buf, PAGE_SIZE, 1118 "%8llu %8llu %8llu\n", 1119 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1120 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1121 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1122 up_read(&zram->init_lock); 1123 1124 return ret; 1125} 1126#endif 1127 1128static ssize_t debug_stat_show(struct device *dev, 1129 struct device_attribute *attr, char *buf) 1130{ 1131 int version = 1; 1132 struct zram *zram = dev_to_zram(dev); 1133 ssize_t ret; 1134 1135 down_read(&zram->init_lock); 1136 ret = scnprintf(buf, PAGE_SIZE, 1137 "version: %d\n%8llu %8llu\n", 1138 version, 1139 (u64)atomic64_read(&zram->stats.writestall), 1140 (u64)atomic64_read(&zram->stats.miss_free)); 1141 up_read(&zram->init_lock); 1142 1143 return ret; 1144} 1145 1146static DEVICE_ATTR_RO(io_stat); 1147static DEVICE_ATTR_RO(mm_stat); 1148#ifdef CONFIG_ZRAM_WRITEBACK 1149static DEVICE_ATTR_RO(bd_stat); 1150#endif 1151static DEVICE_ATTR_RO(debug_stat); 1152 1153#ifdef CONFIG_ZRAM_GROUP 1154static ssize_t group_show(struct device *dev, struct device_attribute *attr, char *buf) 1155{ 1156 struct zram *zram = dev_to_zram(dev); 1157 int ret = 0; 1158 1159 down_read(&zram->init_lock); 1160 if (zram->zgrp_ctrl == ZGRP_NONE) 1161 ret = snprintf(buf, PAGE_SIZE - 1, "disable\n"); 1162 else if (zram->zgrp_ctrl == ZGRP_TRACK) 1163 ret = snprintf(buf, PAGE_SIZE - 1, "readonly\n"); 1164#ifdef CONFIG_ZRAM_GROUP_WRITEBACK 1165 else if (zram->zgrp_ctrl == ZGRP_WRITE) 1166 ret = snprintf(buf, PAGE_SIZE - 1, "readwrite\n"); 1167#endif 1168 up_read(&zram->init_lock); 1169 1170 return ret; 1171} 1172 1173static ssize_t group_store(struct device *dev, struct device_attribute *attr, 1174 const char *buf, size_t len) 1175{ 1176 struct zram *zram = dev_to_zram(dev); 1177 int ret; 1178#ifdef CONFIG_ZRAM_GROUP_DEBUG 1179 u32 op, gid, index; 1180 1181 ret = sscanf(buf, "%u %u %u", &op, &index, &gid); 1182 if (ret == 3) { 1183 pr_info("op[%u] index[%u] gid[%u].\n", op, index, gid); 1184 group_debug(zram, op, index, gid); 1185 return len; 1186 } 1187#endif 1188 1189 ret = len; 1190 down_write(&zram->init_lock); 1191 if (init_done(zram)) { 1192 pr_info("Can't setup group ctrl for initialized device!\n"); 1193 ret = -EBUSY; 1194 goto out; 1195 } 1196 if (!strcmp(buf, "disable\n")) 1197 zram->zgrp_ctrl = ZGRP_NONE; 1198 else if (!strcmp(buf, "readonly\n")) 1199 zram->zgrp_ctrl = ZGRP_TRACK; 1200#ifdef CONFIG_ZRAM_GROUP_WRITEBACK 1201 else if (!strcmp(buf, "readwrite\n")) 1202 zram->zgrp_ctrl = ZGRP_WRITE; 1203#endif 1204 else 1205 ret = -EINVAL; 1206out: 1207 up_write(&zram->init_lock); 1208 1209 return ret; 1210} 1211#endif 1212 1213static void zram_meta_free(struct zram *zram, u64 disksize) 1214{ 1215 size_t num_pages = disksize >> PAGE_SHIFT; 1216 size_t index; 1217 1218 /* Free all pages that are still in this zram device */ 1219 for (index = 0; index < num_pages; index++) 1220 zram_free_page(zram, index); 1221 1222 zs_destroy_pool(zram->mem_pool); 1223 vfree(zram->table); 1224#ifdef CONFIG_ZRAM_GROUP 1225 zram_group_deinit(zram); 1226#endif 1227} 1228 1229static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1230{ 1231 size_t num_pages; 1232 1233 num_pages = disksize >> PAGE_SHIFT; 1234 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1235 if (!zram->table) 1236 return false; 1237 1238 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1239 if (!zram->mem_pool) { 1240 vfree(zram->table); 1241 return false; 1242 } 1243 1244 if (!huge_class_size) 1245 huge_class_size = zs_huge_class_size(zram->mem_pool); 1246#ifdef CONFIG_ZRAM_GROUP 1247 zram_group_init(zram, num_pages); 1248#endif 1249 1250 return true; 1251} 1252 1253/* 1254 * To protect concurrent access to the same index entry, 1255 * caller should hold this table index entry's bit_spinlock to 1256 * indicate this index entry is accessing. 1257 */ 1258static void zram_free_page(struct zram *zram, size_t index) 1259{ 1260 unsigned long handle; 1261 1262#ifdef CONFIG_ZRAM_GROUP 1263 zram_group_untrack_obj(zram, index); 1264#endif 1265 1266#ifdef CONFIG_ZRAM_MEMORY_TRACKING 1267 zram->table[index].ac_time = 0; 1268#endif 1269 if (zram_test_flag(zram, index, ZRAM_IDLE)) 1270 zram_clear_flag(zram, index, ZRAM_IDLE); 1271 1272 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1273 zram_clear_flag(zram, index, ZRAM_HUGE); 1274 atomic64_dec(&zram->stats.huge_pages); 1275 } 1276 1277 if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1278 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1279 1280 zram_set_priority(zram, index, 0); 1281 1282 if (zram_test_flag(zram, index, ZRAM_WB)) { 1283 zram_clear_flag(zram, index, ZRAM_WB); 1284 free_block_bdev(zram, zram_get_element(zram, index)); 1285 goto out; 1286 } 1287 1288 /* 1289 * No memory is allocated for same element filled pages. 1290 * Simply clear same page flag. 1291 */ 1292 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1293 zram_clear_flag(zram, index, ZRAM_SAME); 1294 atomic64_dec(&zram->stats.same_pages); 1295 goto out; 1296 } 1297 1298 handle = zram_get_handle(zram, index); 1299 if (!handle) 1300 return; 1301 1302 zs_free(zram->mem_pool, handle); 1303 1304 atomic64_sub(zram_get_obj_size(zram, index), 1305 &zram->stats.compr_data_size); 1306out: 1307 atomic64_dec(&zram->stats.pages_stored); 1308 zram_set_handle(zram, index, 0); 1309 zram_set_obj_size(zram, index, 0); 1310 WARN_ON_ONCE(zram->table[index].flags & 1311 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); 1312} 1313 1314/* 1315 * Reads (decompresses if needed) a page from zspool (zsmalloc). 1316 * Corresponding ZRAM slot should be locked. 1317 */ 1318static int zram_read_from_zspool(struct zram *zram, struct page *page, 1319 u32 index) 1320{ 1321 struct zcomp_strm *zstrm; 1322 unsigned long handle; 1323 unsigned int size; 1324 void *src, *dst; 1325 u32 prio; 1326 int ret; 1327 1328 handle = zram_get_handle(zram, index); 1329 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1330 unsigned long value; 1331 void *mem; 1332 1333 value = handle ? zram_get_element(zram, index) : 0; 1334 mem = kmap_atomic(page); 1335 zram_fill_page(mem, PAGE_SIZE, value); 1336 kunmap_atomic(mem); 1337 return 0; 1338 } 1339 1340 size = zram_get_obj_size(zram, index); 1341 1342 if (size != PAGE_SIZE) { 1343 prio = zram_get_priority(zram, index); 1344 zstrm = zcomp_stream_get(zram->comps[prio]); 1345 } 1346 1347 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1348 if (size == PAGE_SIZE) { 1349 dst = kmap_atomic(page); 1350 memcpy(dst, src, PAGE_SIZE); 1351 kunmap_atomic(dst); 1352 ret = 0; 1353 } else { 1354 dst = kmap_atomic(page); 1355 ret = zcomp_decompress(zstrm, src, size, dst); 1356 kunmap_atomic(dst); 1357 zcomp_stream_put(zram->comps[prio]); 1358 } 1359 zs_unmap_object(zram->mem_pool, handle); 1360 return ret; 1361} 1362 1363static int zram_read_page(struct zram *zram, struct page *page, u32 index, 1364 struct bio *parent) 1365{ 1366 int ret; 1367 1368 zram_slot_lock(zram, index); 1369#ifdef CONFIG_ZRAM_GROUP_WRITEBACK 1370 if (!parent) { 1371 ret = zram_group_fault_obj(zram, index); 1372 if (ret) { 1373 zram_slot_unlock(zram, index); 1374 return ret; 1375 } 1376 } 1377 1378 if (zram_test_flag(zram, index, ZRAM_GWB)) { 1379 zram_slot_unlock(zram, index); 1380 return -EIO; 1381 } 1382#endif 1383 if (!zram_test_flag(zram, index, ZRAM_WB)) { 1384 /* Slot should be locked through out the function call */ 1385 ret = zram_read_from_zspool(zram, page, index); 1386 zram_slot_unlock(zram, index); 1387 } else { 1388 /* 1389 * The slot should be unlocked before reading from the backing 1390 * device. 1391 */ 1392 zram_slot_unlock(zram, index); 1393 1394 ret = read_from_bdev(zram, page, zram_get_element(zram, index), 1395 parent); 1396 } 1397 1398 /* Should NEVER happen. Return bio error if it does. */ 1399 if (WARN_ON(ret < 0)) 1400 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1401 1402 return ret; 1403} 1404 1405/* 1406 * Use a temporary buffer to decompress the page, as the decompressor 1407 * always expects a full page for the output. 1408 */ 1409static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, 1410 u32 index, int offset) 1411{ 1412 struct page *page = alloc_page(GFP_NOIO); 1413 int ret; 1414 1415 if (!page) 1416 return -ENOMEM; 1417 ret = zram_read_page(zram, page, index, NULL); 1418 if (likely(!ret)) 1419 memcpy_to_bvec(bvec, page_address(page) + offset); 1420 __free_page(page); 1421 return ret; 1422} 1423 1424static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1425 u32 index, int offset, struct bio *bio) 1426{ 1427 if (is_partial_io(bvec)) 1428 return zram_bvec_read_partial(zram, bvec, index, offset); 1429 return zram_read_page(zram, bvec->bv_page, index, bio); 1430} 1431 1432static int zram_write_page(struct zram *zram, struct page *page, u32 index) 1433{ 1434 int ret = 0; 1435 unsigned long alloced_pages; 1436 unsigned long handle = -ENOMEM; 1437 unsigned int comp_len = 0; 1438 void *src, *dst, *mem; 1439 struct zcomp_strm *zstrm; 1440 unsigned long element = 0; 1441 enum zram_pageflags flags = 0; 1442 1443 mem = kmap_atomic(page); 1444 if (page_same_filled(mem, &element)) { 1445 kunmap_atomic(mem); 1446 /* Free memory associated with this sector now. */ 1447 flags = ZRAM_SAME; 1448 atomic64_inc(&zram->stats.same_pages); 1449 goto out; 1450 } 1451 kunmap_atomic(mem); 1452 1453compress_again: 1454 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1455 src = kmap_atomic(page); 1456 ret = zcomp_compress(zstrm, src, &comp_len); 1457 kunmap_atomic(src); 1458 1459 if (unlikely(ret)) { 1460 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1461 pr_err("Compression failed! err=%d\n", ret); 1462 zs_free(zram->mem_pool, handle); 1463 return ret; 1464 } 1465 1466 if (comp_len >= huge_class_size) 1467 comp_len = PAGE_SIZE; 1468 /* 1469 * handle allocation has 2 paths: 1470 * a) fast path is executed with preemption disabled (for 1471 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1472 * since we can't sleep; 1473 * b) slow path enables preemption and attempts to allocate 1474 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1475 * put per-cpu compression stream and, thus, to re-do 1476 * the compression once handle is allocated. 1477 * 1478 * if we have a 'non-null' handle here then we are coming 1479 * from the slow path and handle has already been allocated. 1480 */ 1481 if (IS_ERR_VALUE(handle)) 1482 handle = zs_malloc(zram->mem_pool, comp_len, 1483 __GFP_KSWAPD_RECLAIM | 1484 __GFP_NOWARN | 1485 __GFP_HIGHMEM | 1486 __GFP_MOVABLE); 1487 if (IS_ERR_VALUE(handle)) { 1488 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1489 atomic64_inc(&zram->stats.writestall); 1490 handle = zs_malloc(zram->mem_pool, comp_len, 1491 GFP_NOIO | __GFP_HIGHMEM | 1492 __GFP_MOVABLE); 1493 if (IS_ERR_VALUE(handle)) 1494 return PTR_ERR((void *)handle); 1495 1496 if (comp_len != PAGE_SIZE) 1497 goto compress_again; 1498 /* 1499 * If the page is not compressible, you need to acquire the 1500 * lock and execute the code below. The zcomp_stream_get() 1501 * call is needed to disable the cpu hotplug and grab the 1502 * zstrm buffer back. It is necessary that the dereferencing 1503 * of the zstrm variable below occurs correctly. 1504 */ 1505 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1506 } 1507 1508 alloced_pages = zs_get_total_pages(zram->mem_pool); 1509 update_used_max(zram, alloced_pages); 1510 1511 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1512 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1513 zs_free(zram->mem_pool, handle); 1514 return -ENOMEM; 1515 } 1516 1517 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1518 1519 src = zstrm->buffer; 1520 if (comp_len == PAGE_SIZE) 1521 src = kmap_atomic(page); 1522 memcpy(dst, src, comp_len); 1523 if (comp_len == PAGE_SIZE) 1524 kunmap_atomic(src); 1525 1526 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]); 1527 zs_unmap_object(zram->mem_pool, handle); 1528 atomic64_add(comp_len, &zram->stats.compr_data_size); 1529out: 1530 /* 1531 * Free memory associated with this sector 1532 * before overwriting unused sectors. 1533 */ 1534 zram_slot_lock(zram, index); 1535 zram_free_page(zram, index); 1536 1537 if (comp_len == PAGE_SIZE) { 1538 zram_set_flag(zram, index, ZRAM_HUGE); 1539 atomic64_inc(&zram->stats.huge_pages); 1540 atomic64_inc(&zram->stats.huge_pages_since); 1541 } 1542 1543 if (flags) { 1544 zram_set_flag(zram, index, flags); 1545 zram_set_element(zram, index, element); 1546 } else { 1547 zram_set_handle(zram, index, handle); 1548 zram_set_obj_size(zram, index, comp_len); 1549 } 1550#ifdef CONFIG_ZRAM_GROUP 1551 zram_group_track_obj(zram, index, page_memcg(page)); 1552#endif 1553 zram_slot_unlock(zram, index); 1554 1555 /* Update stats */ 1556 atomic64_inc(&zram->stats.pages_stored); 1557 return ret; 1558} 1559 1560/* 1561 * This is a partial IO. Read the full page before writing the changes. 1562 */ 1563static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, 1564 u32 index, int offset, struct bio *bio) 1565{ 1566 struct page *page = alloc_page(GFP_NOIO); 1567 int ret; 1568 1569 if (!page) 1570 return -ENOMEM; 1571 1572 ret = zram_read_page(zram, page, index, bio); 1573 if (!ret) { 1574 memcpy_from_bvec(page_address(page) + offset, bvec); 1575 ret = zram_write_page(zram, page, index); 1576 } 1577 __free_page(page); 1578 return ret; 1579} 1580 1581static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1582 u32 index, int offset, struct bio *bio) 1583{ 1584 if (is_partial_io(bvec)) 1585 return zram_bvec_write_partial(zram, bvec, index, offset, bio); 1586 return zram_write_page(zram, bvec->bv_page, index); 1587} 1588 1589#ifdef CONFIG_ZRAM_MULTI_COMP 1590/* 1591 * This function will decompress (unless it's ZRAM_HUGE) the page and then 1592 * attempt to compress it using provided compression algorithm priority 1593 * (which is potentially more effective). 1594 * 1595 * Corresponding ZRAM slot should be locked. 1596 */ 1597static int zram_recompress(struct zram *zram, u32 index, struct page *page, 1598 u32 threshold, u32 prio, u32 prio_max) 1599{ 1600 struct zcomp_strm *zstrm = NULL; 1601 unsigned long handle_old; 1602 unsigned long handle_new; 1603 unsigned int comp_len_old; 1604 unsigned int comp_len_new; 1605 unsigned int class_index_old; 1606 unsigned int class_index_new; 1607 u32 num_recomps = 0; 1608 void *src, *dst; 1609 int ret; 1610 1611 handle_old = zram_get_handle(zram, index); 1612 if (!handle_old) 1613 return -EINVAL; 1614 1615 comp_len_old = zram_get_obj_size(zram, index); 1616 /* 1617 * Do not recompress objects that are already "small enough". 1618 */ 1619 if (comp_len_old < threshold) 1620 return 0; 1621 1622 ret = zram_read_from_zspool(zram, page, index); 1623 if (ret) 1624 return ret; 1625 1626 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); 1627 /* 1628 * Iterate the secondary comp algorithms list (in order of priority) 1629 * and try to recompress the page. 1630 */ 1631 for (; prio < prio_max; prio++) { 1632 if (!zram->comps[prio]) 1633 continue; 1634 1635 /* 1636 * Skip if the object is already re-compressed with a higher 1637 * priority algorithm (or same algorithm). 1638 */ 1639 if (prio <= zram_get_priority(zram, index)) 1640 continue; 1641 1642 num_recomps++; 1643 zstrm = zcomp_stream_get(zram->comps[prio]); 1644 src = kmap_atomic(page); 1645 ret = zcomp_compress(zstrm, src, &comp_len_new); 1646 kunmap_atomic(src); 1647 1648 if (ret) { 1649 zcomp_stream_put(zram->comps[prio]); 1650 return ret; 1651 } 1652 1653 class_index_new = zs_lookup_class_index(zram->mem_pool, 1654 comp_len_new); 1655 1656 /* Continue until we make progress */ 1657 if (class_index_new >= class_index_old || 1658 (threshold && comp_len_new >= threshold)) { 1659 zcomp_stream_put(zram->comps[prio]); 1660 continue; 1661 } 1662 1663 /* Recompression was successful so break out */ 1664 break; 1665 } 1666 1667 /* 1668 * We did not try to recompress, e.g. when we have only one 1669 * secondary algorithm and the page is already recompressed 1670 * using that algorithm 1671 */ 1672 if (!zstrm) 1673 return 0; 1674 1675 if (class_index_new >= class_index_old) { 1676 /* 1677 * Secondary algorithms failed to re-compress the page 1678 * in a way that would save memory, mark the object as 1679 * incompressible so that we will not try to compress 1680 * it again. 1681 * 1682 * We need to make sure that all secondary algorithms have 1683 * failed, so we test if the number of recompressions matches 1684 * the number of active secondary algorithms. 1685 */ 1686 if (num_recomps == zram->num_active_comps - 1) 1687 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE); 1688 return 0; 1689 } 1690 1691 /* Successful recompression but above threshold */ 1692 if (threshold && comp_len_new >= threshold) 1693 return 0; 1694 1695 /* 1696 * No direct reclaim (slow path) for handle allocation and no 1697 * re-compression attempt (unlike in zram_write_bvec()) since 1698 * we already have stored that object in zsmalloc. If we cannot 1699 * alloc memory for recompressed object then we bail out and 1700 * simply keep the old (existing) object in zsmalloc. 1701 */ 1702 handle_new = zs_malloc(zram->mem_pool, comp_len_new, 1703 __GFP_KSWAPD_RECLAIM | 1704 __GFP_NOWARN | 1705 __GFP_HIGHMEM | 1706 __GFP_MOVABLE); 1707 if (IS_ERR_VALUE(handle_new)) { 1708 zcomp_stream_put(zram->comps[prio]); 1709 return PTR_ERR((void *)handle_new); 1710 } 1711 1712 dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO); 1713 memcpy(dst, zstrm->buffer, comp_len_new); 1714 zcomp_stream_put(zram->comps[prio]); 1715 1716 zs_unmap_object(zram->mem_pool, handle_new); 1717 1718 zram_free_page(zram, index); 1719 zram_set_handle(zram, index, handle_new); 1720 zram_set_obj_size(zram, index, comp_len_new); 1721 zram_set_priority(zram, index, prio); 1722 1723 atomic64_add(comp_len_new, &zram->stats.compr_data_size); 1724 atomic64_inc(&zram->stats.pages_stored); 1725 1726 return 0; 1727} 1728 1729#define RECOMPRESS_IDLE (1 << 0) 1730#define RECOMPRESS_HUGE (1 << 1) 1731 1732static ssize_t recompress_store(struct device *dev, 1733 struct device_attribute *attr, 1734 const char *buf, size_t len) 1735{ 1736 u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; 1737 struct zram *zram = dev_to_zram(dev); 1738 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 1739 char *args, *param, *val, *algo = NULL; 1740 u32 mode = 0, threshold = 0; 1741 unsigned long index; 1742 struct page *page; 1743 ssize_t ret; 1744 1745 args = skip_spaces(buf); 1746 while (*args) { 1747 args = next_arg(args, ¶m, &val); 1748 1749 if (!val || !*val) 1750 return -EINVAL; 1751 1752 if (!strcmp(param, "type")) { 1753 if (!strcmp(val, "idle")) 1754 mode = RECOMPRESS_IDLE; 1755 if (!strcmp(val, "huge")) 1756 mode = RECOMPRESS_HUGE; 1757 if (!strcmp(val, "huge_idle")) 1758 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE; 1759 continue; 1760 } 1761 1762 if (!strcmp(param, "threshold")) { 1763 /* 1764 * We will re-compress only idle objects equal or 1765 * greater in size than watermark. 1766 */ 1767 ret = kstrtouint(val, 10, &threshold); 1768 if (ret) 1769 return ret; 1770 continue; 1771 } 1772 1773 if (!strcmp(param, "algo")) { 1774 algo = val; 1775 continue; 1776 } 1777 } 1778 1779 if (threshold >= huge_class_size) 1780 return -EINVAL; 1781 1782 down_read(&zram->init_lock); 1783 if (!init_done(zram)) { 1784 ret = -EINVAL; 1785 goto release_init_lock; 1786 } 1787 1788 if (algo) { 1789 bool found = false; 1790 1791 for (; prio < ZRAM_MAX_COMPS; prio++) { 1792 if (!zram->comp_algs[prio]) 1793 continue; 1794 1795 if (!strcmp(zram->comp_algs[prio], algo)) { 1796 prio_max = min(prio + 1, ZRAM_MAX_COMPS); 1797 found = true; 1798 break; 1799 } 1800 } 1801 1802 if (!found) { 1803 ret = -EINVAL; 1804 goto release_init_lock; 1805 } 1806 } 1807 1808 page = alloc_page(GFP_KERNEL); 1809 if (!page) { 1810 ret = -ENOMEM; 1811 goto release_init_lock; 1812 } 1813 1814 ret = len; 1815 for (index = 0; index < nr_pages; index++) { 1816 int err = 0; 1817 1818 zram_slot_lock(zram, index); 1819 1820 if (!zram_allocated(zram, index)) 1821 goto next; 1822 1823 if (mode & RECOMPRESS_IDLE && 1824 !zram_test_flag(zram, index, ZRAM_IDLE)) 1825 goto next; 1826 1827 if (mode & RECOMPRESS_HUGE && 1828 !zram_test_flag(zram, index, ZRAM_HUGE)) 1829 goto next; 1830 1831 if (zram_test_flag(zram, index, ZRAM_WB) || 1832 zram_test_flag(zram, index, ZRAM_UNDER_WB) || 1833 zram_test_flag(zram, index, ZRAM_SAME) || 1834 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE)) 1835 goto next; 1836 1837 err = zram_recompress(zram, index, page, threshold, 1838 prio, prio_max); 1839next: 1840 zram_slot_unlock(zram, index); 1841 if (err) { 1842 ret = err; 1843 break; 1844 } 1845 1846 cond_resched(); 1847 } 1848 1849 __free_page(page); 1850 1851release_init_lock: 1852 up_read(&zram->init_lock); 1853 return ret; 1854} 1855#endif 1856 1857static void zram_bio_discard(struct zram *zram, struct bio *bio) 1858{ 1859 size_t n = bio->bi_iter.bi_size; 1860 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1861 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 1862 SECTOR_SHIFT; 1863 1864 /* 1865 * zram manages data in physical block size units. Because logical block 1866 * size isn't identical with physical block size on some arch, we 1867 * could get a discard request pointing to a specific offset within a 1868 * certain physical block. Although we can handle this request by 1869 * reading that physiclal block and decompressing and partially zeroing 1870 * and re-compressing and then re-storing it, this isn't reasonable 1871 * because our intent with a discard request is to save memory. So 1872 * skipping this logical block is appropriate here. 1873 */ 1874 if (offset) { 1875 if (n <= (PAGE_SIZE - offset)) 1876 return; 1877 1878 n -= (PAGE_SIZE - offset); 1879 index++; 1880 } 1881 1882 while (n >= PAGE_SIZE) { 1883 zram_slot_lock(zram, index); 1884 zram_free_page(zram, index); 1885 zram_slot_unlock(zram, index); 1886 atomic64_inc(&zram->stats.notify_free); 1887 index++; 1888 n -= PAGE_SIZE; 1889 } 1890 1891 bio_endio(bio); 1892} 1893 1894static void zram_bio_read(struct zram *zram, struct bio *bio) 1895{ 1896 unsigned long start_time = bio_start_io_acct(bio); 1897 struct bvec_iter iter = bio->bi_iter; 1898 1899 do { 1900 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1901 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 1902 SECTOR_SHIFT; 1903 struct bio_vec bv = bio_iter_iovec(bio, iter); 1904 1905 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 1906 1907 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { 1908 atomic64_inc(&zram->stats.failed_reads); 1909 bio->bi_status = BLK_STS_IOERR; 1910 break; 1911 } 1912 flush_dcache_page(bv.bv_page); 1913 1914 zram_slot_lock(zram, index); 1915 zram_accessed(zram, index); 1916 zram_slot_unlock(zram, index); 1917 1918 bio_advance_iter_single(bio, &iter, bv.bv_len); 1919 } while (iter.bi_size); 1920 1921 bio_end_io_acct(bio, start_time); 1922 bio_endio(bio); 1923} 1924 1925static void zram_bio_write(struct zram *zram, struct bio *bio) 1926{ 1927 unsigned long start_time = bio_start_io_acct(bio); 1928 struct bvec_iter iter = bio->bi_iter; 1929 1930 do { 1931 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1932 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << 1933 SECTOR_SHIFT; 1934 struct bio_vec bv = bio_iter_iovec(bio, iter); 1935 1936 bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); 1937 1938 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { 1939 atomic64_inc(&zram->stats.failed_writes); 1940 bio->bi_status = BLK_STS_IOERR; 1941 break; 1942 } 1943 1944 zram_slot_lock(zram, index); 1945 zram_accessed(zram, index); 1946 zram_slot_unlock(zram, index); 1947 1948 bio_advance_iter_single(bio, &iter, bv.bv_len); 1949 } while (iter.bi_size); 1950 1951 bio_end_io_acct(bio, start_time); 1952 bio_endio(bio); 1953} 1954 1955/* 1956 * Handler function for all zram I/O requests. 1957 */ 1958static void zram_submit_bio(struct bio *bio) 1959{ 1960 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 1961 1962 switch (bio_op(bio)) { 1963 case REQ_OP_READ: 1964 zram_bio_read(zram, bio); 1965 break; 1966 case REQ_OP_WRITE: 1967 zram_bio_write(zram, bio); 1968 break; 1969 case REQ_OP_DISCARD: 1970 case REQ_OP_WRITE_ZEROES: 1971 zram_bio_discard(zram, bio); 1972 break; 1973 default: 1974 WARN_ON_ONCE(1); 1975 bio_endio(bio); 1976 } 1977} 1978 1979static void zram_slot_free_notify(struct block_device *bdev, 1980 unsigned long index) 1981{ 1982 struct zram *zram; 1983 1984 zram = bdev->bd_disk->private_data; 1985 1986 atomic64_inc(&zram->stats.notify_free); 1987 if (!zram_slot_trylock(zram, index)) { 1988 atomic64_inc(&zram->stats.miss_free); 1989 return; 1990 } 1991 1992 zram_free_page(zram, index); 1993 zram_slot_unlock(zram, index); 1994} 1995 1996static void zram_destroy_comps(struct zram *zram) 1997{ 1998 u32 prio; 1999 2000 for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { 2001 struct zcomp *comp = zram->comps[prio]; 2002 2003 zram->comps[prio] = NULL; 2004 if (!comp) 2005 continue; 2006 zcomp_destroy(comp); 2007 zram->num_active_comps--; 2008 } 2009} 2010 2011static void zram_reset_device(struct zram *zram) 2012{ 2013 down_write(&zram->init_lock); 2014 2015 zram->limit_pages = 0; 2016 2017 if (!init_done(zram)) { 2018 up_write(&zram->init_lock); 2019 return; 2020 } 2021 2022 set_capacity_and_notify(zram->disk, 0); 2023 part_stat_set_all(zram->disk->part0, 0); 2024 2025 /* I/O operation under all of CPU are done so let's free */ 2026 zram_meta_free(zram, zram->disksize); 2027 zram->disksize = 0; 2028 zram_destroy_comps(zram); 2029 memset(&zram->stats, 0, sizeof(zram->stats)); 2030 reset_bdev(zram); 2031 2032 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2033 up_write(&zram->init_lock); 2034} 2035 2036static ssize_t disksize_store(struct device *dev, 2037 struct device_attribute *attr, const char *buf, size_t len) 2038{ 2039 u64 disksize; 2040 struct zcomp *comp; 2041 struct zram *zram = dev_to_zram(dev); 2042 int err; 2043 u32 prio; 2044 2045 disksize = memparse(buf, NULL); 2046 if (!disksize) 2047 return -EINVAL; 2048 2049 down_write(&zram->init_lock); 2050 if (init_done(zram)) { 2051 pr_info("Cannot change disksize for initialized device\n"); 2052 err = -EBUSY; 2053 goto out_unlock; 2054 } 2055 2056 disksize = PAGE_ALIGN(disksize); 2057 if (!zram_meta_alloc(zram, disksize)) { 2058 err = -ENOMEM; 2059 goto out_unlock; 2060 } 2061 2062 for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) { 2063 if (!zram->comp_algs[prio]) 2064 continue; 2065 2066 comp = zcomp_create(zram->comp_algs[prio]); 2067 if (IS_ERR(comp)) { 2068 pr_err("Cannot initialise %s compressing backend\n", 2069 zram->comp_algs[prio]); 2070 err = PTR_ERR(comp); 2071 goto out_free_comps; 2072 } 2073 2074 zram->comps[prio] = comp; 2075 zram->num_active_comps++; 2076 } 2077 zram->disksize = disksize; 2078 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 2079 up_write(&zram->init_lock); 2080 2081 return len; 2082 2083out_free_comps: 2084 zram_destroy_comps(zram); 2085 zram_meta_free(zram, disksize); 2086out_unlock: 2087 up_write(&zram->init_lock); 2088 return err; 2089} 2090 2091static ssize_t reset_store(struct device *dev, 2092 struct device_attribute *attr, const char *buf, size_t len) 2093{ 2094 int ret; 2095 unsigned short do_reset; 2096 struct zram *zram; 2097 struct gendisk *disk; 2098 2099 ret = kstrtou16(buf, 10, &do_reset); 2100 if (ret) 2101 return ret; 2102 2103 if (!do_reset) 2104 return -EINVAL; 2105 2106 zram = dev_to_zram(dev); 2107 disk = zram->disk; 2108 2109 mutex_lock(&disk->open_mutex); 2110 /* Do not reset an active device or claimed device */ 2111 if (disk_openers(disk) || zram->claim) { 2112 mutex_unlock(&disk->open_mutex); 2113 return -EBUSY; 2114 } 2115 2116 /* From now on, anyone can't open /dev/zram[0-9] */ 2117 zram->claim = true; 2118 mutex_unlock(&disk->open_mutex); 2119 2120 /* Make sure all the pending I/O are finished */ 2121 sync_blockdev(disk->part0); 2122 zram_reset_device(zram); 2123 2124 mutex_lock(&disk->open_mutex); 2125 zram->claim = false; 2126 mutex_unlock(&disk->open_mutex); 2127 2128 return len; 2129} 2130 2131static int zram_open(struct gendisk *disk, blk_mode_t mode) 2132{ 2133 struct zram *zram = disk->private_data; 2134 2135 WARN_ON(!mutex_is_locked(&disk->open_mutex)); 2136 2137 /* zram was claimed to reset so open request fails */ 2138 if (zram->claim) 2139 return -EBUSY; 2140 return 0; 2141} 2142 2143static const struct block_device_operations zram_devops = { 2144 .open = zram_open, 2145 .submit_bio = zram_submit_bio, 2146 .swap_slot_free_notify = zram_slot_free_notify, 2147 .owner = THIS_MODULE 2148}; 2149 2150static DEVICE_ATTR_WO(compact); 2151static DEVICE_ATTR_RW(disksize); 2152static DEVICE_ATTR_RO(initstate); 2153static DEVICE_ATTR_WO(reset); 2154static DEVICE_ATTR_WO(mem_limit); 2155static DEVICE_ATTR_WO(mem_used_max); 2156static DEVICE_ATTR_WO(idle); 2157static DEVICE_ATTR_RW(max_comp_streams); 2158static DEVICE_ATTR_RW(comp_algorithm); 2159#ifdef CONFIG_ZRAM_WRITEBACK 2160static DEVICE_ATTR_RW(backing_dev); 2161static DEVICE_ATTR_WO(writeback); 2162static DEVICE_ATTR_RW(writeback_limit); 2163static DEVICE_ATTR_RW(writeback_limit_enable); 2164#endif 2165#ifdef CONFIG_ZRAM_MULTI_COMP 2166static DEVICE_ATTR_RW(recomp_algorithm); 2167static DEVICE_ATTR_WO(recompress); 2168#endif 2169#ifdef CONFIG_ZRAM_GROUP 2170static DEVICE_ATTR_RW(group); 2171#endif 2172 2173static struct attribute *zram_disk_attrs[] = { 2174 &dev_attr_disksize.attr, 2175 &dev_attr_initstate.attr, 2176 &dev_attr_reset.attr, 2177 &dev_attr_compact.attr, 2178 &dev_attr_mem_limit.attr, 2179 &dev_attr_mem_used_max.attr, 2180 &dev_attr_idle.attr, 2181 &dev_attr_max_comp_streams.attr, 2182 &dev_attr_comp_algorithm.attr, 2183#ifdef CONFIG_ZRAM_WRITEBACK 2184 &dev_attr_backing_dev.attr, 2185 &dev_attr_writeback.attr, 2186 &dev_attr_writeback_limit.attr, 2187 &dev_attr_writeback_limit_enable.attr, 2188#endif 2189 &dev_attr_io_stat.attr, 2190 &dev_attr_mm_stat.attr, 2191#ifdef CONFIG_ZRAM_WRITEBACK 2192 &dev_attr_bd_stat.attr, 2193#endif 2194 &dev_attr_debug_stat.attr, 2195#ifdef CONFIG_ZRAM_MULTI_COMP 2196 &dev_attr_recomp_algorithm.attr, 2197 &dev_attr_recompress.attr, 2198#endif 2199#ifdef CONFIG_ZRAM_GROUP 2200 &dev_attr_group.attr, 2201#endif 2202 NULL, 2203}; 2204 2205ATTRIBUTE_GROUPS(zram_disk); 2206 2207/* 2208 * Allocate and initialize new zram device. the function returns 2209 * '>= 0' device_id upon success, and negative value otherwise. 2210 */ 2211static int zram_add(void) 2212{ 2213 struct zram *zram; 2214 int ret, device_id; 2215 2216 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 2217 if (!zram) 2218 return -ENOMEM; 2219 2220 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 2221 if (ret < 0) 2222 goto out_free_dev; 2223 device_id = ret; 2224 2225 init_rwsem(&zram->init_lock); 2226#ifdef CONFIG_ZRAM_WRITEBACK 2227 spin_lock_init(&zram->wb_limit_lock); 2228#endif 2229 2230 /* gendisk structure */ 2231 zram->disk = blk_alloc_disk(NUMA_NO_NODE); 2232 if (!zram->disk) { 2233 pr_err("Error allocating disk structure for device %d\n", 2234 device_id); 2235 ret = -ENOMEM; 2236 goto out_free_idr; 2237 } 2238 2239 zram->disk->major = zram_major; 2240 zram->disk->first_minor = device_id; 2241 zram->disk->minors = 1; 2242 zram->disk->flags |= GENHD_FL_NO_PART; 2243 zram->disk->fops = &zram_devops; 2244 zram->disk->private_data = zram; 2245 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 2246 2247 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */ 2248 set_capacity(zram->disk, 0); 2249 /* zram devices sort of resembles non-rotational disks */ 2250 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); 2251 blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue); 2252 2253 /* 2254 * To ensure that we always get PAGE_SIZE aligned 2255 * and n*PAGE_SIZED sized I/O requests. 2256 */ 2257 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 2258 blk_queue_logical_block_size(zram->disk->queue, 2259 ZRAM_LOGICAL_BLOCK_SIZE); 2260 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 2261 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 2262 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 2263 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 2264 2265 /* 2266 * zram_bio_discard() will clear all logical blocks if logical block 2267 * size is identical with physical block size(PAGE_SIZE). But if it is 2268 * different, we will skip discarding some parts of logical blocks in 2269 * the part of the request range which isn't aligned to physical block 2270 * size. So we can't ensure that all discarded logical blocks are 2271 * zeroed. 2272 */ 2273 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 2274 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 2275 2276 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); 2277 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 2278 if (ret) 2279 goto out_cleanup_disk; 2280 2281 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); 2282 2283 zram_debugfs_register(zram); 2284 pr_info("Added device: %s\n", zram->disk->disk_name); 2285 return device_id; 2286 2287out_cleanup_disk: 2288 put_disk(zram->disk); 2289out_free_idr: 2290 idr_remove(&zram_index_idr, device_id); 2291out_free_dev: 2292 kfree(zram); 2293 return ret; 2294} 2295 2296static int zram_remove(struct zram *zram) 2297{ 2298 bool claimed; 2299 2300 mutex_lock(&zram->disk->open_mutex); 2301 if (disk_openers(zram->disk)) { 2302 mutex_unlock(&zram->disk->open_mutex); 2303 return -EBUSY; 2304 } 2305 2306 claimed = zram->claim; 2307 if (!claimed) 2308 zram->claim = true; 2309 mutex_unlock(&zram->disk->open_mutex); 2310 2311 zram_debugfs_unregister(zram); 2312 2313 if (claimed) { 2314 /* 2315 * If we were claimed by reset_store(), del_gendisk() will 2316 * wait until reset_store() is done, so nothing need to do. 2317 */ 2318 ; 2319 } else { 2320 /* Make sure all the pending I/O are finished */ 2321 sync_blockdev(zram->disk->part0); 2322 zram_reset_device(zram); 2323 } 2324 2325 pr_info("Removed device: %s\n", zram->disk->disk_name); 2326 2327 del_gendisk(zram->disk); 2328 2329 /* del_gendisk drains pending reset_store */ 2330 WARN_ON_ONCE(claimed && zram->claim); 2331 2332 /* 2333 * disksize_store() may be called in between zram_reset_device() 2334 * and del_gendisk(), so run the last reset to avoid leaking 2335 * anything allocated with disksize_store() 2336 */ 2337 zram_reset_device(zram); 2338 2339 put_disk(zram->disk); 2340 kfree(zram); 2341 return 0; 2342} 2343 2344/* zram-control sysfs attributes */ 2345 2346/* 2347 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2348 * sense that reading from this file does alter the state of your system -- it 2349 * creates a new un-initialized zram device and returns back this device's 2350 * device_id (or an error code if it fails to create a new device). 2351 */ 2352static ssize_t hot_add_show(const struct class *class, 2353 const struct class_attribute *attr, 2354 char *buf) 2355{ 2356 int ret; 2357 2358 mutex_lock(&zram_index_mutex); 2359 ret = zram_add(); 2360 mutex_unlock(&zram_index_mutex); 2361 2362 if (ret < 0) 2363 return ret; 2364 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2365} 2366/* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */ 2367static struct class_attribute class_attr_hot_add = 2368 __ATTR(hot_add, 0400, hot_add_show, NULL); 2369 2370static ssize_t hot_remove_store(const struct class *class, 2371 const struct class_attribute *attr, 2372 const char *buf, 2373 size_t count) 2374{ 2375 struct zram *zram; 2376 int ret, dev_id; 2377 2378 /* dev_id is gendisk->first_minor, which is `int' */ 2379 ret = kstrtoint(buf, 10, &dev_id); 2380 if (ret) 2381 return ret; 2382 if (dev_id < 0) 2383 return -EINVAL; 2384 2385 mutex_lock(&zram_index_mutex); 2386 2387 zram = idr_find(&zram_index_idr, dev_id); 2388 if (zram) { 2389 ret = zram_remove(zram); 2390 if (!ret) 2391 idr_remove(&zram_index_idr, dev_id); 2392 } else { 2393 ret = -ENODEV; 2394 } 2395 2396 mutex_unlock(&zram_index_mutex); 2397 return ret ? ret : count; 2398} 2399static CLASS_ATTR_WO(hot_remove); 2400 2401static struct attribute *zram_control_class_attrs[] = { 2402 &class_attr_hot_add.attr, 2403 &class_attr_hot_remove.attr, 2404 NULL, 2405}; 2406ATTRIBUTE_GROUPS(zram_control_class); 2407 2408static struct class zram_control_class = { 2409 .name = "zram-control", 2410 .class_groups = zram_control_class_groups, 2411}; 2412 2413static int zram_remove_cb(int id, void *ptr, void *data) 2414{ 2415 WARN_ON_ONCE(zram_remove(ptr)); 2416 return 0; 2417} 2418 2419static void destroy_devices(void) 2420{ 2421 class_unregister(&zram_control_class); 2422 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2423 zram_debugfs_destroy(); 2424 idr_destroy(&zram_index_idr); 2425 unregister_blkdev(zram_major, "zram"); 2426 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2427} 2428 2429static int __init zram_init(void) 2430{ 2431 int ret; 2432 2433 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG); 2434 2435 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2436 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2437 if (ret < 0) 2438 return ret; 2439 2440 ret = class_register(&zram_control_class); 2441 if (ret) { 2442 pr_err("Unable to register zram-control class\n"); 2443 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2444 return ret; 2445 } 2446 2447 zram_debugfs_create(); 2448 zram_major = register_blkdev(0, "zram"); 2449 if (zram_major <= 0) { 2450 pr_err("Unable to get major number\n"); 2451 class_unregister(&zram_control_class); 2452 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2453 return -EBUSY; 2454 } 2455 2456 while (num_devices != 0) { 2457 mutex_lock(&zram_index_mutex); 2458 ret = zram_add(); 2459 mutex_unlock(&zram_index_mutex); 2460 if (ret < 0) 2461 goto out_error; 2462 num_devices--; 2463 } 2464 2465 return 0; 2466 2467out_error: 2468 destroy_devices(); 2469 return ret; 2470} 2471 2472static void __exit zram_exit(void) 2473{ 2474 destroy_devices(); 2475} 2476 2477module_init(zram_init); 2478module_exit(zram_exit); 2479 2480module_param(num_devices, uint, 0); 2481MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2482 2483MODULE_LICENSE("Dual BSD/GPL"); 2484MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2485MODULE_DESCRIPTION("Compressed RAM Block Device"); 2486