Lines Matching refs:pool

42  * The block size of the device holding pool data must be
194 * A pool device ties together a metadata device and a data device. It
201 * The pool runs in various modes. Ordered in degraded order for comparisons.
232 struct pool {
234 struct dm_target *ti; /* Only set if a pool target is bound */
290 static void metadata_operation_failed(struct pool *pool, const char *op, int r);
292 static enum pool_mode get_pool_mode(struct pool *pool)
294 return pool->pf.mode;
297 static void notify_of_pool_mode_change(struct pool *pool)
307 enum pool_mode mode = get_pool_mode(pool);
310 if (!pool->pf.error_if_no_space)
316 dm_table_event(pool->ti->table);
317 DMINFO("%s: switching pool to %s%s mode",
318 dm_device_name(pool->pool_md),
323 * Target context for a pool.
327 struct pool *pool;
346 struct pool *pool;
367 static bool block_size_is_power_of_two(struct pool *pool)
369 return pool->sectors_per_block_shift >= 0;
372 static sector_t block_to_sectors(struct pool *pool, dm_block_t b)
374 return block_size_is_power_of_two(pool) ?
375 (b << pool->sectors_per_block_shift) :
376 (b * pool->sectors_per_block);
401 sector_t s = block_to_sectors(tc->pool, data_b);
402 sector_t len = block_to_sectors(tc->pool, data_e - data_b);
436 static void wake_worker(struct pool *pool)
438 queue_work(pool->wq, &pool->worker);
443 static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
453 cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
455 r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
461 dm_bio_prison_free_cell(pool->prison, cell_prealloc);
466 static void cell_release(struct pool *pool,
470 dm_cell_release(pool->prison, cell, bios);
471 dm_bio_prison_free_cell(pool->prison, cell);
474 static void cell_visit_release(struct pool *pool,
479 dm_cell_visit_release(pool->prison, fn, context, cell);
480 dm_bio_prison_free_cell(pool->prison, cell);
483 static void cell_release_no_holder(struct pool *pool,
487 dm_cell_release_no_holder(pool->prison, cell, bios);
488 dm_bio_prison_free_cell(pool->prison, cell);
491 static void cell_error_with_code(struct pool *pool,
494 dm_cell_error(pool->prison, cell, error_code);
495 dm_bio_prison_free_cell(pool->prison, cell);
498 static blk_status_t get_pool_io_error_code(struct pool *pool)
500 return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
503 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
505 cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
508 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
510 cell_error_with_code(pool, cell, 0);
513 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
515 cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
539 static void __pool_table_insert(struct pool *pool)
542 list_add(&pool->list, &dm_thin_pool_table.pools);
545 static void __pool_table_remove(struct pool *pool)
548 list_del(&pool->list);
551 static struct pool *__pool_table_lookup(struct mapped_device *md)
553 struct pool *pool = NULL, *tmp;
559 pool = tmp;
564 return pool;
567 static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
569 struct pool *pool = NULL, *tmp;
575 pool = tmp;
580 return pool;
626 struct pool *pool = tc->pool;
637 cell_requeue(pool, cell);
655 static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
660 list_for_each_entry_rcu(tc, &pool->active_thins, list)
665 static void error_retry_list(struct pool *pool)
667 error_retry_list_with_code(pool, get_pool_io_error_code(pool));
672 * Much of the code depends on pool object resources (lists, workqueues, etc)
673 * but most is exclusively called from the thin target rather than the thin-pool
679 struct pool *pool = tc->pool;
682 if (block_size_is_power_of_two(pool))
683 block_nr >>= pool->sectors_per_block_shift;
685 (void) sector_div(block_nr, pool->sectors_per_block);
696 struct pool *pool = tc->pool;
700 b += pool->sectors_per_block - 1ull; /* so we round up */
702 if (block_size_is_power_of_two(pool)) {
703 b >>= pool->sectors_per_block_shift;
704 e >>= pool->sectors_per_block_shift;
706 (void) sector_div(b, pool->sectors_per_block);
707 (void) sector_div(e, pool->sectors_per_block);
720 struct pool *pool = tc->pool;
724 if (block_size_is_power_of_two(pool))
726 (block << pool->sectors_per_block_shift) |
727 (bi_sector & (pool->sectors_per_block - 1));
729 bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
730 sector_div(bi_sector, pool->sectors_per_block);
744 static void inc_all_io_entry(struct pool *pool, struct bio *bio)
752 h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
757 struct pool *pool = tc->pool;
778 spin_lock_irq(&pool->lock);
779 bio_list_add(&pool->deferred_flush_bios, bio);
780 spin_unlock_irq(&pool->lock);
832 struct pool *pool = m->tc->pool;
835 list_add_tail(&m->list, &pool->prepared_mappings);
836 wake_worker(pool);
843 struct pool *pool = m->tc->pool;
845 spin_lock_irqsave(&pool->lock, flags);
847 spin_unlock_irqrestore(&pool->lock, flags);
885 struct pool *pool = tc->pool;
890 cell_release_no_holder(pool, cell, &bios);
896 wake_worker(pool);
918 inc_all_io_entry(info->tc->pool, bio);
946 cell_visit_release(tc->pool, __inc_remap_and_issue_cell,
958 cell_error(m->tc->pool, m->cell);
960 mempool_free(m, &m->tc->pool->mapping_pool);
965 struct pool *pool = tc->pool;
990 spin_lock_irq(&pool->lock);
991 bio_list_add(&pool->deferred_flush_completions, bio);
992 spin_unlock_irq(&pool->lock);
998 struct pool *pool = tc->pool;
1003 cell_error(pool, m->cell);
1014 metadata_operation_failed(pool, "dm_thin_insert_block", r);
1015 cell_error(pool, m->cell);
1029 inc_all_io_entry(tc->pool, m->cell->holder);
1036 mempool_free(m, &pool->mapping_pool);
1047 mempool_free(m, &tc->pool->mapping_pool);
1069 metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
1075 mempool_free(m, &tc->pool->mapping_pool);
1090 struct pool *pool = tc->pool;
1098 r = dm_pool_block_is_shared(pool->pmd, b, &shared);
1111 r = dm_pool_block_is_shared(pool->pmd, e, &shared);
1132 struct pool *pool = m->tc->pool;
1134 spin_lock_irqsave(&pool->lock, flags);
1135 list_add_tail(&m->list, &pool->prepared_discards_pt2);
1136 spin_unlock_irqrestore(&pool->lock, flags);
1137 wake_worker(pool);
1154 struct pool *pool = tc->pool;
1165 metadata_operation_failed(pool, "dm_thin_remove_range", r);
1168 mempool_free(m, &pool->mapping_pool);
1176 r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
1178 metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
1181 mempool_free(m, &pool->mapping_pool);
1203 struct pool *pool = tc->pool;
1209 r = dm_pool_dec_data_range(pool->pmd, m->data_block,
1212 metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
1218 mempool_free(m, &pool->mapping_pool);
1221 static void process_prepared(struct pool *pool, struct list_head *head,
1228 spin_lock_irq(&pool->lock);
1230 spin_unlock_irq(&pool->lock);
1239 static int io_overlaps_block(struct pool *pool, struct bio *bio)
1242 (pool->sectors_per_block << SECTOR_SHIFT);
1245 static int io_overwrites_block(struct pool *pool, struct bio *bio)
1248 io_overlaps_block(pool, bio);
1258 static int ensure_next_mapping(struct pool *pool)
1260 if (pool->next_mapping)
1263 pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
1265 return pool->next_mapping ? 0 : -ENOMEM;
1268 static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
1270 struct dm_thin_new_mapping *m = pool->next_mapping;
1272 BUG_ON(!pool->next_mapping);
1278 pool->next_mapping = NULL;
1292 dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
1299 struct pool *pool = tc->pool;
1305 inc_all_io_entry(pool, bio);
1318 struct pool *pool = tc->pool;
1319 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1334 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
1338 * IO to pool_dev remaps to the pool target's data_dev.
1343 if (io_overwrites_block(pool, bio))
1349 from.sector = data_origin * pool->sectors_per_block;
1353 to.sector = data_dest * pool->sectors_per_block;
1356 dm_kcopyd_copy(pool->copier, &from, 1, &to,
1362 if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
1365 data_dest * pool->sectors_per_block + len,
1366 (data_dest + 1) * pool->sectors_per_block);
1379 tc->pool->sectors_per_block);
1386 struct pool *pool = tc->pool;
1387 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1401 if (pool->pf.zero_new_blocks) {
1402 if (io_overwrites_block(pool, bio))
1405 ll_zero(tc, m, data_block * pool->sectors_per_block,
1406 (data_block + 1) * pool->sectors_per_block);
1415 struct pool *pool = tc->pool;
1416 sector_t virt_block_begin = virt_block * pool->sectors_per_block;
1417 sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
1422 pool->sectors_per_block);
1433 static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
1435 static void requeue_bios(struct pool *pool);
1442 static bool is_read_only(struct pool *pool)
1444 return is_read_only_pool_mode(get_pool_mode(pool));
1447 static void check_for_metadata_space(struct pool *pool)
1453 r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
1459 if (ooms_reason && !is_read_only(pool)) {
1461 set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
1465 static void check_for_data_space(struct pool *pool)
1470 if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
1473 r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
1478 set_pool_mode(pool, PM_WRITE);
1479 requeue_bios(pool);
1487 static int commit(struct pool *pool)
1491 if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
1494 r = dm_pool_commit_metadata(pool->pmd);
1496 metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
1498 check_for_metadata_space(pool);
1499 check_for_data_space(pool);
1505 static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
1507 if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
1509 dm_device_name(pool->pool_md));
1510 spin_lock_irq(&pool->lock);
1511 pool->low_water_triggered = true;
1512 spin_unlock_irq(&pool->lock);
1513 dm_table_event(pool->ti->table);
1521 struct pool *pool = tc->pool;
1523 if (WARN_ON(get_pool_mode(pool) != PM_WRITE))
1526 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1528 metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1532 check_low_water_mark(pool, free_blocks);
1539 r = commit(pool);
1543 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1545 metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1550 set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1555 r = dm_pool_alloc_data_block(pool->pmd, result);
1558 set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1560 metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
1564 r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
1566 metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
1572 r = commit(pool);
1594 static blk_status_t should_error_unserviceable_bio(struct pool *pool)
1596 enum pool_mode m = get_pool_mode(pool);
1601 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1605 return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
1613 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1618 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1620 blk_status_t error = should_error_unserviceable_bio(pool);
1629 static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *cell)
1635 error = should_error_unserviceable_bio(pool);
1637 cell_error_with_code(pool, cell, error);
1642 cell_release(pool, cell, &bios);
1651 struct pool *pool = tc->pool;
1652 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1664 if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
1665 pool->process_prepared_discard(m);
1671 struct pool *pool = tc->pool;
1698 r = ensure_next_mapping(pool);
1708 if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
1718 m = get_next_mapping(pool);
1736 if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
1737 pool->process_prepared_discard(m);
1789 if (bio_detain(tc->pool, &virt_key, bio, &virt_cell)) {
1800 tc->pool->process_discard_cell(tc, virt_cell);
1810 struct pool *pool = tc->pool;
1820 retry_bios_on_resume(pool, cell);
1826 cell_error(pool, cell);
1844 h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds);
1845 inc_all_io_entry(info->tc->pool, bio);
1862 cell_visit_release(tc->pool, __remap_and_issue_shared_cell,
1878 struct pool *pool = tc->pool;
1886 if (bio_detain(pool, &key, bio, &data_cell)) {
1897 h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
1898 inc_all_io_entry(pool, bio);
1911 struct pool *pool = tc->pool;
1917 inc_all_io_entry(pool, bio);
1944 retry_bios_on_resume(pool, cell);
1950 cell_error(pool, cell);
1958 struct pool *pool = tc->pool;
1964 cell_requeue(pool, cell);
1974 inc_all_io_entry(pool, bio);
1982 inc_all_io_entry(pool, bio);
2012 struct pool *pool = tc->pool;
2022 if (bio_detain(pool, &key, bio, &cell))
2040 handle_unserviceable_bio(tc->pool, bio);
2044 inc_all_io_entry(tc->pool, bio);
2055 handle_unserviceable_bio(tc->pool, bio);
2060 inc_all_io_entry(tc->pool, bio);
2101 cell_success(tc->pool, cell);
2106 cell_error(tc->pool, cell);
2113 static int need_commit_due_to_time(struct pool *pool)
2115 return !time_in_range(jiffies, pool->last_commit_jiffies,
2116 pool->last_commit_jiffies + COMMIT_PERIOD);
2185 struct pool *pool = tc->pool;
2220 if (ensure_next_mapping(pool)) {
2229 pool->process_discard(tc, bio);
2231 pool->process_bio(tc, bio);
2234 throttle_work_update(&pool->throttle);
2235 dm_pool_issue_prefetches(pool->pmd);
2259 static unsigned int sort_cells(struct pool *pool, struct list_head *cells)
2268 pool->cell_sort_array[count++] = cell;
2272 sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL);
2279 struct pool *pool = tc->pool;
2294 count = sort_cells(tc->pool, &cells);
2297 cell = pool->cell_sort_array[i];
2305 if (ensure_next_mapping(pool)) {
2307 list_add(&pool->cell_sort_array[j]->user_list, &cells);
2316 pool->process_discard_cell(tc, cell);
2318 pool->process_cell(tc, cell);
2332 static struct thin_c *get_first_thin(struct pool *pool)
2337 if (!list_empty(&pool->active_thins)) {
2338 tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
2346 static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
2351 list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
2363 static void process_deferred_bios(struct pool *pool)
2369 tc = get_first_thin(pool);
2373 tc = get_next_thin(pool, tc);
2383 spin_lock_irq(&pool->lock);
2384 bio_list_merge(&bios, &pool->deferred_flush_bios);
2385 bio_list_init(&pool->deferred_flush_bios);
2387 bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
2388 bio_list_init(&pool->deferred_flush_completions);
2389 spin_unlock_irq(&pool->lock);
2392 !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
2395 if (commit(pool)) {
2402 pool->last_commit_jiffies = jiffies;
2421 struct pool *pool = container_of(ws, struct pool, worker);
2423 throttle_work_start(&pool->throttle);
2424 dm_pool_issue_prefetches(pool->pmd);
2425 throttle_work_update(&pool->throttle);
2426 process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
2427 throttle_work_update(&pool->throttle);
2428 process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
2429 throttle_work_update(&pool->throttle);
2430 process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
2431 throttle_work_update(&pool->throttle);
2432 process_deferred_bios(pool);
2433 throttle_work_complete(&pool->throttle);
2442 struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
2444 wake_worker(pool);
2445 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
2450 * timeout either the pool will have been resized (and thus back in
2455 struct pool *pool = container_of(to_delayed_work(ws), struct pool,
2458 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
2459 pool->pf.error_if_no_space = true;
2460 notify_of_pool_mode_change(pool);
2461 error_retry_list_with_code(pool, BLK_STS_NOSPC);
2482 static void pool_work_wait(struct pool_work *pw, struct pool *pool,
2487 queue_work(pool->wq, &pw->worker);
2525 pool_work_wait(&w.pw, tc->pool, fn);
2530 static void set_discard_callbacks(struct pool *pool)
2532 struct pool_c *pt = pool->ti->private;
2535 pool->process_discard_cell = process_discard_cell_passdown;
2536 pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
2537 pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
2539 pool->process_discard_cell = process_discard_cell_no_passdown;
2540 pool->process_prepared_discard = process_prepared_discard_no_passdown;
2544 static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
2546 struct pool_c *pt = pool->ti->private;
2547 bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
2548 enum pool_mode old_mode = get_pool_mode(pool);
2552 * Never allow the pool to transition to PM_WRITE mode if user
2556 DMERR("%s: unable to switch pool to write mode until repaired.",
2557 dm_device_name(pool->pool_md));
2566 * pool move out of the old mode.
2573 dm_pool_metadata_read_only(pool->pmd);
2574 pool->process_bio = process_bio_fail;
2575 pool->process_discard = process_bio_fail;
2576 pool->process_cell = process_cell_fail;
2577 pool->process_discard_cell = process_cell_fail;
2578 pool->process_prepared_mapping = process_prepared_mapping_fail;
2579 pool->process_prepared_discard = process_prepared_discard_fail;
2581 error_retry_list(pool);
2586 dm_pool_metadata_read_only(pool->pmd);
2587 pool->process_bio = process_bio_read_only;
2588 pool->process_discard = process_bio_success;
2589 pool->process_cell = process_cell_read_only;
2590 pool->process_discard_cell = process_cell_success;
2591 pool->process_prepared_mapping = process_prepared_mapping_fail;
2592 pool->process_prepared_discard = process_prepared_discard_success;
2594 error_retry_list(pool);
2600 * would trigger userland to extend the pool before we
2606 pool->out_of_data_space = true;
2607 pool->process_bio = process_bio_read_only;
2608 pool->process_discard = process_discard_bio;
2609 pool->process_cell = process_cell_read_only;
2610 pool->process_prepared_mapping = process_prepared_mapping;
2611 set_discard_callbacks(pool);
2613 if (!pool->pf.error_if_no_space && no_space_timeout)
2614 queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
2619 cancel_delayed_work_sync(&pool->no_space_timeout);
2620 pool->out_of_data_space = false;
2621 pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
2622 dm_pool_metadata_read_write(pool->pmd);
2623 pool->process_bio = process_bio;
2624 pool->process_discard = process_discard_bio;
2625 pool->process_cell = process_cell;
2626 pool->process_prepared_mapping = process_prepared_mapping;
2627 set_discard_callbacks(pool);
2631 pool->pf.mode = new_mode;
2633 * The pool mode may have changed, sync it so bind_control_target()
2639 notify_of_pool_mode_change(pool);
2642 static void abort_transaction(struct pool *pool)
2644 const char *dev_name = dm_device_name(pool->pool_md);
2647 if (dm_pool_abort_metadata(pool->pmd)) {
2649 set_pool_mode(pool, PM_FAIL);
2652 if (dm_pool_metadata_set_needs_check(pool->pmd)) {
2654 set_pool_mode(pool, PM_FAIL);
2658 static void metadata_operation_failed(struct pool *pool, const char *op, int r)
2661 dm_device_name(pool->pool_md), op, r);
2663 abort_transaction(pool);
2664 set_pool_mode(pool, PM_READ_ONLY);
2678 struct pool *pool = tc->pool;
2684 wake_worker(pool);
2689 struct pool *pool = tc->pool;
2691 throttle_lock(&pool->throttle);
2693 throttle_unlock(&pool->throttle);
2698 struct pool *pool = tc->pool;
2700 throttle_lock(&pool->throttle);
2704 throttle_unlock(&pool->throttle);
2706 wake_worker(pool);
2741 if (get_pool_mode(tc->pool) == PM_FAIL) {
2756 if (bio_detain(tc->pool, &key, bio, &virt_cell))
2786 if (bio_detain(tc->pool, &key, bio, &data_cell)) {
2791 inc_all_io_entry(tc->pool, bio);
2807 * pool is switched to fail-io mode.
2815 static void requeue_bios(struct pool *pool)
2820 list_for_each_entry_rcu(tc, &pool->active_thins, list) {
2831 * Binding of control targets to a pool object
2845 struct pool *pool = pt->pool;
2856 else if (data_limits->max_discard_sectors < pool->sectors_per_block)
2865 static int bind_control_target(struct pool *pool, struct dm_target *ti)
2870 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
2872 enum pool_mode old_mode = get_pool_mode(pool);
2876 * Don't change the pool's mode until set_pool_mode() below.
2877 * Otherwise the pool's process_* function pointers may
2878 * not match the desired pool mode.
2882 pool->ti = ti;
2883 pool->pf = pt->adjusted_pf;
2884 pool->low_water_blocks = pt->low_water_blocks;
2886 set_pool_mode(pool, new_mode);
2891 static void unbind_control_target(struct pool *pool, struct dm_target *ti)
2893 if (pool->ti == ti)
2894 pool->ti = NULL;
2902 /* Initialize pool features. */
2912 static void __pool_destroy(struct pool *pool)
2914 __pool_table_remove(pool);
2916 vfree(pool->cell_sort_array);
2917 if (dm_pool_metadata_close(pool->pmd) < 0)
2920 dm_bio_prison_destroy(pool->prison);
2921 dm_kcopyd_client_destroy(pool->copier);
2923 cancel_delayed_work_sync(&pool->waker);
2924 cancel_delayed_work_sync(&pool->no_space_timeout);
2925 if (pool->wq)
2926 destroy_workqueue(pool->wq);
2928 if (pool->next_mapping)
2929 mempool_free(pool->next_mapping, &pool->mapping_pool);
2930 mempool_exit(&pool->mapping_pool);
2931 dm_deferred_set_destroy(pool->shared_read_ds);
2932 dm_deferred_set_destroy(pool->all_io_ds);
2933 kfree(pool);
2938 static struct pool *pool_create(struct mapped_device *pool_md,
2946 struct pool *pool;
2953 return (struct pool *)pmd;
2956 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
2957 if (!pool) {
2958 *error = "Error allocating memory for pool";
2963 pool->pmd = pmd;
2964 pool->sectors_per_block = block_size;
2966 pool->sectors_per_block_shift = -1;
2968 pool->sectors_per_block_shift = __ffs(block_size);
2969 pool->low_water_blocks = 0;
2970 pool_features_init(&pool->pf);
2971 pool->prison = dm_bio_prison_create();
2972 if (!pool->prison) {
2973 *error = "Error creating pool's bio prison";
2978 pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2979 if (IS_ERR(pool->copier)) {
2980 r = PTR_ERR(pool->copier);
2981 *error = "Error creating pool's kcopyd client";
2990 pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2991 if (!pool->wq) {
2992 *error = "Error creating pool's workqueue";
2997 throttle_init(&pool->throttle);
2998 INIT_WORK(&pool->worker, do_worker);
2999 INIT_DELAYED_WORK(&pool->waker, do_waker);
3000 INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
3001 spin_lock_init(&pool->lock);
3002 bio_list_init(&pool->deferred_flush_bios);
3003 bio_list_init(&pool->deferred_flush_completions);
3004 INIT_LIST_HEAD(&pool->prepared_mappings);
3005 INIT_LIST_HEAD(&pool->prepared_discards);
3006 INIT_LIST_HEAD(&pool->prepared_discards_pt2);
3007 INIT_LIST_HEAD(&pool->active_thins);
3008 pool->low_water_triggered = false;
3009 pool->suspended = true;
3010 pool->out_of_data_space = false;
3012 pool->shared_read_ds = dm_deferred_set_create();
3013 if (!pool->shared_read_ds) {
3014 *error = "Error creating pool's shared read deferred set";
3019 pool->all_io_ds = dm_deferred_set_create();
3020 if (!pool->all_io_ds) {
3021 *error = "Error creating pool's all io deferred set";
3026 pool->next_mapping = NULL;
3027 r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
3030 *error = "Error creating pool's mapping mempool";
3035 pool->cell_sort_array =
3037 sizeof(*pool->cell_sort_array)));
3038 if (!pool->cell_sort_array) {
3044 pool->ref_count = 1;
3045 pool->last_commit_jiffies = jiffies;
3046 pool->pool_md = pool_md;
3047 pool->md_dev = metadata_dev;
3048 pool->data_dev = data_dev;
3049 __pool_table_insert(pool);
3051 return pool;
3054 mempool_exit(&pool->mapping_pool);
3056 dm_deferred_set_destroy(pool->all_io_ds);
3058 dm_deferred_set_destroy(pool->shared_read_ds);
3060 destroy_workqueue(pool->wq);
3062 dm_kcopyd_client_destroy(pool->copier);
3064 dm_bio_prison_destroy(pool->prison);
3066 kfree(pool);
3074 static void __pool_inc(struct pool *pool)
3077 pool->ref_count++;
3080 static void __pool_dec(struct pool *pool)
3083 BUG_ON(!pool->ref_count);
3084 if (!--pool->ref_count)
3085 __pool_destroy(pool);
3088 static struct pool *__pool_find(struct mapped_device *pool_md,
3094 struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
3096 if (pool) {
3097 if (pool->pool_md != pool_md) {
3098 *error = "metadata device already in use by a pool";
3101 if (pool->data_dev != data_dev) {
3102 *error = "data device already in use by a pool";
3105 __pool_inc(pool);
3108 pool = __pool_table_lookup(pool_md);
3109 if (pool) {
3110 if (pool->md_dev != metadata_dev || pool->data_dev != data_dev) {
3111 *error = "different pool cannot replace a pool";
3114 __pool_inc(pool);
3117 pool = pool_create(pool_md, metadata_dev, data_dev, block_size, read_only, error);
3122 return pool;
3136 unbind_control_target(pt->pool, ti);
3137 __pool_dec(pt->pool);
3153 {0, 4, "Invalid number of pool feature arguments"},
3186 ti->error = "Unrecognised pool feature requested";
3197 struct pool *pool = context;
3200 dm_device_name(pool->pool_md));
3202 dm_table_event(pool->ti->table);
3218 struct pool *pool = context;
3220 return blkdev_issue_flush(pool->data_dev);
3275 * thin-pool <metadata dev> <data dev>
3284 * read_only: Don't allow any changes to be made to the pool metadata.
3291 struct pool *pool;
3322 * Set default pool features.
3367 pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, data_dev->bdev,
3369 if (IS_ERR(pool)) {
3370 r = PTR_ERR(pool);
3377 * initial load. This would require a pool reload to trigger thin
3380 if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
3386 pt->pool = pool;
3396 * Only need to enable discards if the pool should pass
3404 * stacking of discard limits (this keeps the pool and
3412 r = dm_pool_register_metadata_threshold(pt->pool->pmd,
3415 pool);
3421 dm_pool_register_pre_commit_callback(pool->pmd,
3422 metadata_pre_commit_callback, pool);
3429 __pool_dec(pool);
3445 struct pool *pool = pt->pool;
3450 spin_lock_irq(&pool->lock);
3452 spin_unlock_irq(&pool->lock);
3461 struct pool *pool = pt->pool;
3467 (void) sector_div(data_size, pool->sectors_per_block);
3469 r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
3472 dm_device_name(pool->pool_md));
3477 DMERR("%s: pool target (%llu blocks) too small: expected %llu",
3478 dm_device_name(pool->pool_md),
3483 if (dm_pool_metadata_needs_check(pool->pmd)) {
3485 dm_device_name(pool->pool_md));
3491 dm_device_name(pool->pool_md),
3493 r = dm_pool_resize_data_dev(pool->pmd, data_size);
3495 metadata_operation_failed(pool, "dm_pool_resize_data_dev", r);
3509 struct pool *pool = pt->pool;
3514 metadata_dev_size = get_metadata_dev_size_in_blocks(pool->md_dev);
3516 r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
3519 dm_device_name(pool->pool_md));
3525 dm_device_name(pool->pool_md),
3530 if (dm_pool_metadata_needs_check(pool->pmd)) {
3532 dm_device_name(pool->pool_md));
3536 warn_if_metadata_device_too_big(pool->md_dev);
3538 dm_device_name(pool->pool_md),
3541 if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
3542 set_pool_mode(pool, PM_WRITE);
3544 r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
3546 metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
3572 struct pool *pool = pt->pool;
3575 * Take control of the pool object.
3577 r = bind_control_target(pool, ti);
3590 (void) commit(pool);
3593 * When a thin-pool is PM_FAIL, it cannot be rebuilt if
3597 if (r && get_pool_mode(pool) == PM_FAIL)
3603 static void pool_suspend_active_thins(struct pool *pool)
3608 tc = get_first_thin(pool);
3611 tc = get_next_thin(pool, tc);
3615 static void pool_resume_active_thins(struct pool *pool)
3620 tc = get_first_thin(pool);
3623 tc = get_next_thin(pool, tc);
3630 struct pool *pool = pt->pool;
3636 requeue_bios(pool);
3637 pool_resume_active_thins(pool);
3639 spin_lock_irq(&pool->lock);
3640 pool->low_water_triggered = false;
3641 pool->suspended = false;
3642 spin_unlock_irq(&pool->lock);
3644 do_waker(&pool->waker.work);
3650 struct pool *pool = pt->pool;
3652 spin_lock_irq(&pool->lock);
3653 pool->suspended = true;
3654 spin_unlock_irq(&pool->lock);
3656 pool_suspend_active_thins(pool);
3662 struct pool *pool = pt->pool;
3664 pool_resume_active_thins(pool);
3666 spin_lock_irq(&pool->lock);
3667 pool->suspended = false;
3668 spin_unlock_irq(&pool->lock);
3674 struct pool *pool = pt->pool;
3676 cancel_delayed_work_sync(&pool->waker);
3677 cancel_delayed_work_sync(&pool->no_space_timeout);
3678 flush_workqueue(pool->wq);
3679 (void) commit(pool);
3705 static int process_create_thin_mesg(unsigned int argc, char **argv, struct pool *pool)
3718 r = dm_pool_create_thin(pool->pmd, dev_id);
3728 static int process_create_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3746 r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
3756 static int process_delete_mesg(unsigned int argc, char **argv, struct pool *pool)
3769 r = dm_pool_delete_thin_device(pool->pmd, dev_id);
3776 static int process_set_transaction_id_mesg(unsigned int argc, char **argv, struct pool *pool)
3795 r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
3805 static int process_reserve_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3813 (void) commit(pool);
3815 r = dm_pool_reserve_metadata_snap(pool->pmd);
3822 static int process_release_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3830 r = dm_pool_release_metadata_snap(pool->pmd);
3851 struct pool *pool = pt->pool;
3853 if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
3854 DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
3855 dm_device_name(pool->pool_md));
3860 r = process_create_thin_mesg(argc, argv, pool);
3863 r = process_create_snap_mesg(argc, argv, pool);
3866 r = process_delete_mesg(argc, argv, pool);
3869 r = process_set_transaction_id_mesg(argc, argv, pool);
3872 r = process_reserve_metadata_snap_mesg(argc, argv, pool);
3875 r = process_release_metadata_snap_mesg(argc, argv, pool);
3878 DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
3881 (void) commit(pool);
3914 * <pool mode> <discard config> <no space config> <needs_check>
3931 struct pool *pool = pt->pool;
3935 if (get_pool_mode(pool) == PM_FAIL) {
3942 (void) commit(pool);
3944 r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
3947 dm_device_name(pool->pool_md), r);
3951 r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
3954 dm_device_name(pool->pool_md), r);
3958 r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
3961 dm_device_name(pool->pool_md), r);
3965 r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
3968 dm_device_name(pool->pool_md), r);
3972 r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
3975 dm_device_name(pool->pool_md), r);
3979 r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
3982 dm_device_name(pool->pool_md), r);
3998 mode = get_pool_mode(pool);
4006 if (!pool->pf.discard_enabled)
4008 else if (pool->pf.discard_passdown)
4013 if (pool->pf.error_if_no_space)
4018 if (dm_pool_metadata_needs_check(pool->pmd))
4031 (unsigned long)pool->sectors_per_block,
4057 struct pool *pool = pt->pool;
4061 * If max_sectors is smaller than pool->sectors_per_block adjust it
4062 * to the highest possible power-of-2 factor of pool->sectors_per_block.
4063 * This is especially beneficial when the pool's data device is a RAID
4064 * device that has a full stripe width that matches pool->sectors_per_block
4069 if (limits->max_sectors < pool->sectors_per_block) {
4070 while (!is_factor(pool->sectors_per_block, limits->max_sectors)) {
4079 * pool's blocksize (io_opt is a factor) do not override them.
4081 if (io_opt_sectors < pool->sectors_per_block ||
4082 !is_factor(io_opt_sectors, pool->sectors_per_block)) {
4083 if (is_factor(pool->sectors_per_block, limits->max_sectors))
4086 blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
4087 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
4092 * They get transferred to the live pool in bind_control_target()
4101 * The pool uses the same discard limits as the underlying data
4107 * block layer will stack them if pool's data device has support.
4114 .name = "thin-pool",
4153 spin_lock_irq(&tc->pool->lock);
4155 spin_unlock_irq(&tc->pool->lock);
4163 __pool_dec(tc->pool);
4178 * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
4180 * origin_dev: a device external to the pool that should act as the origin
4182 * If the pool device has discards disabled, they get disabled for the thin
4230 ti->error = "Error opening pool device";
4243 ti->error = "Couldn't get pool mapped device";
4248 tc->pool = __pool_table_lookup(pool_md);
4249 if (!tc->pool) {
4250 ti->error = "Couldn't find pool object";
4254 __pool_inc(tc->pool);
4256 if (get_pool_mode(tc->pool) == PM_FAIL) {
4262 r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
4268 r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
4278 /* In case the pool supports discards, pass them on. */
4279 if (tc->pool->pf.discard_enabled) {
4287 spin_lock_irq(&tc->pool->lock);
4288 if (tc->pool->suspended) {
4289 spin_unlock_irq(&tc->pool->lock);
4291 ti->error = "Unable to activate thin device while pool is suspended";
4297 list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
4298 spin_unlock_irq(&tc->pool->lock);
4314 __pool_dec(tc->pool);
4344 struct pool *pool = h->tc->pool;
4350 spin_lock_irqsave(&pool->lock, flags);
4355 spin_unlock_irqrestore(&pool->lock, flags);
4362 spin_lock_irqsave(&pool->lock, flags);
4364 list_add_tail(&m->list, &pool->prepared_discards);
4365 spin_unlock_irqrestore(&pool->lock, flags);
4366 wake_worker(pool);
4417 if (get_pool_mode(tc->pool) == PM_FAIL) {
4439 DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
4442 tc->pool->sectors_per_block) - 1);
4472 struct pool *pool = tc->pool;
4476 * we follow a more convoluted path through to the pool's target.
4478 if (!pool->ti)
4481 blocks = pool->ti->len;
4482 (void) sector_div(blocks, pool->sectors_per_block);
4484 return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
4492 struct pool *pool = tc->pool;
4494 if (pool->pf.discard_enabled) {
4495 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
4496 limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;