Lines Matching refs:pool

41  * The block size of the device holding pool data must be
191 * A pool device ties together a metadata device and a data device. It
198 * The pool runs in various modes. Ordered in degraded order for comparisons.
229 struct pool {
231 struct dm_target *ti; /* Only set if a pool target is bound */
289 static void metadata_operation_failed(struct pool *pool, const char *op, int r);
291 static enum pool_mode get_pool_mode(struct pool *pool)
293 return pool->pf.mode;
296 static void notify_of_pool_mode_change(struct pool *pool)
306 enum pool_mode mode = get_pool_mode(pool);
309 if (!pool->pf.error_if_no_space)
315 dm_table_event(pool->ti->table);
316 DMINFO("%s: switching pool to %s%s mode",
317 dm_device_name(pool->pool_md),
322 * Target context for a pool.
326 struct pool *pool;
345 struct pool *pool;
366 static bool block_size_is_power_of_two(struct pool *pool)
368 return pool->sectors_per_block_shift >= 0;
371 static sector_t block_to_sectors(struct pool *pool, dm_block_t b)
373 return block_size_is_power_of_two(pool) ?
374 (b << pool->sectors_per_block_shift) :
375 (b * pool->sectors_per_block);
400 sector_t s = block_to_sectors(tc->pool, data_b);
401 sector_t len = block_to_sectors(tc->pool, data_e - data_b);
436 static void wake_worker(struct pool *pool)
438 queue_work(pool->wq, &pool->worker);
443 static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
453 cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
455 r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
461 dm_bio_prison_free_cell(pool->prison, cell_prealloc);
466 static void cell_release(struct pool *pool,
470 dm_cell_release(pool->prison, cell, bios);
471 dm_bio_prison_free_cell(pool->prison, cell);
474 static void cell_visit_release(struct pool *pool,
479 dm_cell_visit_release(pool->prison, fn, context, cell);
480 dm_bio_prison_free_cell(pool->prison, cell);
483 static void cell_release_no_holder(struct pool *pool,
487 dm_cell_release_no_holder(pool->prison, cell, bios);
488 dm_bio_prison_free_cell(pool->prison, cell);
491 static void cell_error_with_code(struct pool *pool,
494 dm_cell_error(pool->prison, cell, error_code);
495 dm_bio_prison_free_cell(pool->prison, cell);
498 static blk_status_t get_pool_io_error_code(struct pool *pool)
500 return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
503 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
505 cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
508 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
510 cell_error_with_code(pool, cell, 0);
513 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
515 cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
539 static void __pool_table_insert(struct pool *pool)
542 list_add(&pool->list, &dm_thin_pool_table.pools);
545 static void __pool_table_remove(struct pool *pool)
548 list_del(&pool->list);
551 static struct pool *__pool_table_lookup(struct mapped_device *md)
553 struct pool *pool = NULL, *tmp;
559 pool = tmp;
564 return pool;
567 static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
569 struct pool *pool = NULL, *tmp;
575 pool = tmp;
580 return pool;
626 struct pool *pool = tc->pool;
637 cell_requeue(pool, cell);
655 static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
660 list_for_each_entry_rcu(tc, &pool->active_thins, list)
665 static void error_retry_list(struct pool *pool)
667 error_retry_list_with_code(pool, get_pool_io_error_code(pool));
672 * Much of the code depends on pool object resources (lists, workqueues, etc)
673 * but most is exclusively called from the thin target rather than the thin-pool
679 struct pool *pool = tc->pool;
682 if (block_size_is_power_of_two(pool))
683 block_nr >>= pool->sectors_per_block_shift;
685 (void) sector_div(block_nr, pool->sectors_per_block);
696 struct pool *pool = tc->pool;
700 b += pool->sectors_per_block - 1ull; /* so we round up */
702 if (block_size_is_power_of_two(pool)) {
703 b >>= pool->sectors_per_block_shift;
704 e >>= pool->sectors_per_block_shift;
706 (void) sector_div(b, pool->sectors_per_block);
707 (void) sector_div(e, pool->sectors_per_block);
720 struct pool *pool = tc->pool;
724 if (block_size_is_power_of_two(pool))
726 (block << pool->sectors_per_block_shift) |
727 (bi_sector & (pool->sectors_per_block - 1));
729 bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
730 sector_div(bi_sector, pool->sectors_per_block);
744 static void inc_all_io_entry(struct pool *pool, struct bio *bio)
752 h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
757 struct pool *pool = tc->pool;
778 spin_lock_irq(&pool->lock);
779 bio_list_add(&pool->deferred_flush_bios, bio);
780 spin_unlock_irq(&pool->lock);
832 struct pool *pool = m->tc->pool;
835 list_add_tail(&m->list, &pool->prepared_mappings);
836 wake_worker(pool);
843 struct pool *pool = m->tc->pool;
845 spin_lock_irqsave(&pool->lock, flags);
847 spin_unlock_irqrestore(&pool->lock, flags);
885 struct pool *pool = tc->pool;
890 cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
895 wake_worker(pool);
916 inc_all_io_entry(info->tc->pool, bio);
944 cell_visit_release(tc->pool, __inc_remap_and_issue_cell,
956 cell_error(m->tc->pool, m->cell);
958 mempool_free(m, &m->tc->pool->mapping_pool);
963 struct pool *pool = tc->pool;
988 spin_lock_irq(&pool->lock);
989 bio_list_add(&pool->deferred_flush_completions, bio);
990 spin_unlock_irq(&pool->lock);
996 struct pool *pool = tc->pool;
1001 cell_error(pool, m->cell);
1012 metadata_operation_failed(pool, "dm_thin_insert_block", r);
1013 cell_error(pool, m->cell);
1027 inc_all_io_entry(tc->pool, m->cell->holder);
1034 mempool_free(m, &pool->mapping_pool);
1044 mempool_free(m, &tc->pool->mapping_pool);
1066 metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
1072 mempool_free(m, &tc->pool->mapping_pool);
1087 struct pool *pool = tc->pool;
1095 r = dm_pool_block_is_shared(pool->pmd, b, &shared);
1108 r = dm_pool_block_is_shared(pool->pmd, e, &shared);
1129 struct pool *pool = m->tc->pool;
1131 spin_lock_irqsave(&pool->lock, flags);
1132 list_add_tail(&m->list, &pool->prepared_discards_pt2);
1133 spin_unlock_irqrestore(&pool->lock, flags);
1134 wake_worker(pool);
1151 struct pool *pool = tc->pool;
1162 metadata_operation_failed(pool, "dm_thin_remove_range", r);
1165 mempool_free(m, &pool->mapping_pool);
1173 r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
1175 metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
1178 mempool_free(m, &pool->mapping_pool);
1185 dm_device_name(tc->pool->pool_md));
1208 struct pool *pool = tc->pool;
1214 r = dm_pool_dec_data_range(pool->pmd, m->data_block,
1217 metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
1223 mempool_free(m, &pool->mapping_pool);
1226 static void process_prepared(struct pool *pool, struct list_head *head,
1233 spin_lock_irq(&pool->lock);
1235 spin_unlock_irq(&pool->lock);
1244 static int io_overlaps_block(struct pool *pool, struct bio *bio)
1247 (pool->sectors_per_block << SECTOR_SHIFT);
1250 static int io_overwrites_block(struct pool *pool, struct bio *bio)
1253 io_overlaps_block(pool, bio);
1263 static int ensure_next_mapping(struct pool *pool)
1265 if (pool->next_mapping)
1268 pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
1270 return pool->next_mapping ? 0 : -ENOMEM;
1273 static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
1275 struct dm_thin_new_mapping *m = pool->next_mapping;
1277 BUG_ON(!pool->next_mapping);
1283 pool->next_mapping = NULL;
1297 dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
1304 struct pool *pool = tc->pool;
1310 inc_all_io_entry(pool, bio);
1323 struct pool *pool = tc->pool;
1324 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1339 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
1343 * IO to pool_dev remaps to the pool target's data_dev.
1348 if (io_overwrites_block(pool, bio))
1354 from.sector = data_origin * pool->sectors_per_block;
1358 to.sector = data_dest * pool->sectors_per_block;
1361 dm_kcopyd_copy(pool->copier, &from, 1, &to,
1367 if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
1370 data_dest * pool->sectors_per_block + len,
1371 (data_dest + 1) * pool->sectors_per_block);
1384 tc->pool->sectors_per_block);
1391 struct pool *pool = tc->pool;
1392 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1406 if (pool->pf.zero_new_blocks) {
1407 if (io_overwrites_block(pool, bio))
1410 ll_zero(tc, m, data_block * pool->sectors_per_block,
1411 (data_block + 1) * pool->sectors_per_block);
1420 struct pool *pool = tc->pool;
1421 sector_t virt_block_begin = virt_block * pool->sectors_per_block;
1422 sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
1427 pool->sectors_per_block);
1438 static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
1440 static void requeue_bios(struct pool *pool);
1447 static bool is_read_only(struct pool *pool)
1449 return is_read_only_pool_mode(get_pool_mode(pool));
1452 static void check_for_metadata_space(struct pool *pool)
1458 r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
1464 if (ooms_reason && !is_read_only(pool)) {
1466 set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
1470 static void check_for_data_space(struct pool *pool)
1475 if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
1478 r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
1483 set_pool_mode(pool, PM_WRITE);
1484 requeue_bios(pool);
1492 static int commit(struct pool *pool)
1496 if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
1499 r = dm_pool_commit_metadata(pool->pmd);
1501 metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
1503 check_for_metadata_space(pool);
1504 check_for_data_space(pool);
1510 static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
1512 if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
1514 dm_device_name(pool->pool_md));
1515 spin_lock_irq(&pool->lock);
1516 pool->low_water_triggered = true;
1517 spin_unlock_irq(&pool->lock);
1518 dm_table_event(pool->ti->table);
1526 struct pool *pool = tc->pool;
1528 if (WARN_ON(get_pool_mode(pool) != PM_WRITE))
1531 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1533 metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1537 check_low_water_mark(pool, free_blocks);
1544 r = commit(pool);
1548 r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1550 metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1555 set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1560 r = dm_pool_alloc_data_block(pool->pmd, result);
1563 set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1565 metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
1569 r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
1571 metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
1577 r = commit(pool);
1599 static blk_status_t should_error_unserviceable_bio(struct pool *pool)
1601 enum pool_mode m = get_pool_mode(pool);
1606 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1610 return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
1618 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1623 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1625 blk_status_t error = should_error_unserviceable_bio(pool);
1634 static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *cell)
1640 error = should_error_unserviceable_bio(pool);
1642 cell_error_with_code(pool, cell, error);
1647 cell_release(pool, cell, &bios);
1656 struct pool *pool = tc->pool;
1657 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1669 if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
1670 pool->process_prepared_discard(m);
1676 struct pool *pool = tc->pool;
1686 r = ensure_next_mapping(pool);
1701 if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
1711 m = get_next_mapping(pool);
1729 if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
1730 pool->process_prepared_discard(m);
1773 if (bio_detain(tc->pool, &virt_key, bio, &virt_cell))
1783 tc->pool->process_discard_cell(tc, virt_cell);
1793 struct pool *pool = tc->pool;
1803 retry_bios_on_resume(pool, cell);
1809 cell_error(pool, cell);
1827 h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds);
1828 inc_all_io_entry(info->tc->pool, bio);
1845 cell_visit_release(tc->pool, __remap_and_issue_shared_cell,
1861 struct pool *pool = tc->pool;
1869 if (bio_detain(pool, &key, bio, &data_cell)) {
1880 h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
1881 inc_all_io_entry(pool, bio);
1894 struct pool *pool = tc->pool;
1900 inc_all_io_entry(pool, bio);
1927 retry_bios_on_resume(pool, cell);
1933 cell_error(pool, cell);
1941 struct pool *pool = tc->pool;
1947 cell_requeue(pool, cell);
1957 inc_all_io_entry(pool, bio);
1965 inc_all_io_entry(pool, bio);
1995 struct pool *pool = tc->pool;
2005 if (bio_detain(pool, &key, bio, &cell))
2023 handle_unserviceable_bio(tc->pool, bio);
2027 inc_all_io_entry(tc->pool, bio);
2038 handle_unserviceable_bio(tc->pool, bio);
2043 inc_all_io_entry(tc->pool, bio);
2084 cell_success(tc->pool, cell);
2089 cell_error(tc->pool, cell);
2096 static int need_commit_due_to_time(struct pool *pool)
2098 return !time_in_range(jiffies, pool->last_commit_jiffies,
2099 pool->last_commit_jiffies + COMMIT_PERIOD);
2168 struct pool *pool = tc->pool;
2203 if (ensure_next_mapping(pool)) {
2212 pool->process_discard(tc, bio);
2214 pool->process_bio(tc, bio);
2217 throttle_work_update(&pool->throttle);
2218 dm_pool_issue_prefetches(pool->pmd);
2242 static unsigned sort_cells(struct pool *pool, struct list_head *cells)
2251 pool->cell_sort_array[count++] = cell;
2255 sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL);
2262 struct pool *pool = tc->pool;
2277 count = sort_cells(tc->pool, &cells);
2280 cell = pool->cell_sort_array[i];
2288 if (ensure_next_mapping(pool)) {
2290 list_add(&pool->cell_sort_array[j]->user_list, &cells);
2299 pool->process_discard_cell(tc, cell);
2301 pool->process_cell(tc, cell);
2315 static struct thin_c *get_first_thin(struct pool *pool)
2320 if (!list_empty(&pool->active_thins)) {
2321 tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
2329 static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
2334 list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
2346 static void process_deferred_bios(struct pool *pool)
2352 tc = get_first_thin(pool);
2356 tc = get_next_thin(pool, tc);
2366 spin_lock_irq(&pool->lock);
2367 bio_list_merge(&bios, &pool->deferred_flush_bios);
2368 bio_list_init(&pool->deferred_flush_bios);
2370 bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
2371 bio_list_init(&pool->deferred_flush_completions);
2372 spin_unlock_irq(&pool->lock);
2375 !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
2378 if (commit(pool)) {
2385 pool->last_commit_jiffies = jiffies;
2404 struct pool *pool = container_of(ws, struct pool, worker);
2406 throttle_work_start(&pool->throttle);
2407 dm_pool_issue_prefetches(pool->pmd);
2408 throttle_work_update(&pool->throttle);
2409 process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
2410 throttle_work_update(&pool->throttle);
2411 process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
2412 throttle_work_update(&pool->throttle);
2413 process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
2414 throttle_work_update(&pool->throttle);
2415 process_deferred_bios(pool);
2416 throttle_work_complete(&pool->throttle);
2425 struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
2426 wake_worker(pool);
2427 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
2432 * timeout either the pool will have been resized (and thus back in
2437 struct pool *pool = container_of(to_delayed_work(ws), struct pool,
2440 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
2441 pool->pf.error_if_no_space = true;
2442 notify_of_pool_mode_change(pool);
2443 error_retry_list_with_code(pool, BLK_STS_NOSPC);
2464 static void pool_work_wait(struct pool_work *pw, struct pool *pool,
2469 queue_work(pool->wq, &pw->worker);
2505 pool_work_wait(&w.pw, tc->pool, fn);
2515 static void set_discard_callbacks(struct pool *pool)
2517 struct pool_c *pt = pool->ti->private;
2520 pool->process_discard_cell = process_discard_cell_passdown;
2521 pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
2522 pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
2524 pool->process_discard_cell = process_discard_cell_no_passdown;
2525 pool->process_prepared_discard = process_prepared_discard_no_passdown;
2529 static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
2531 struct pool_c *pt = pool->ti->private;
2532 bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
2533 enum pool_mode old_mode = get_pool_mode(pool);
2537 * Never allow the pool to transition to PM_WRITE mode if user
2541 DMERR("%s: unable to switch pool to write mode until repaired.",
2542 dm_device_name(pool->pool_md));
2551 * pool move out of the old mode.
2558 dm_pool_metadata_read_only(pool->pmd);
2559 pool->process_bio = process_bio_fail;
2560 pool->process_discard = process_bio_fail;
2561 pool->process_cell = process_cell_fail;
2562 pool->process_discard_cell = process_cell_fail;
2563 pool->process_prepared_mapping = process_prepared_mapping_fail;
2564 pool->process_prepared_discard = process_prepared_discard_fail;
2566 error_retry_list(pool);
2571 dm_pool_metadata_read_only(pool->pmd);
2572 pool->process_bio = process_bio_read_only;
2573 pool->process_discard = process_bio_success;
2574 pool->process_cell = process_cell_read_only;
2575 pool->process_discard_cell = process_cell_success;
2576 pool->process_prepared_mapping = process_prepared_mapping_fail;
2577 pool->process_prepared_discard = process_prepared_discard_success;
2579 error_retry_list(pool);
2585 * would trigger userland to extend the pool before we
2591 pool->out_of_data_space = true;
2592 pool->process_bio = process_bio_read_only;
2593 pool->process_discard = process_discard_bio;
2594 pool->process_cell = process_cell_read_only;
2595 pool->process_prepared_mapping = process_prepared_mapping;
2596 set_discard_callbacks(pool);
2598 if (!pool->pf.error_if_no_space && no_space_timeout)
2599 queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
2604 cancel_delayed_work_sync(&pool->no_space_timeout);
2605 pool->out_of_data_space = false;
2606 pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
2607 dm_pool_metadata_read_write(pool->pmd);
2608 pool->process_bio = process_bio;
2609 pool->process_discard = process_discard_bio;
2610 pool->process_cell = process_cell;
2611 pool->process_prepared_mapping = process_prepared_mapping;
2612 set_discard_callbacks(pool);
2616 pool->pf.mode = new_mode;
2618 * The pool mode may have changed, sync it so bind_control_target()
2624 notify_of_pool_mode_change(pool);
2627 static void abort_transaction(struct pool *pool)
2629 const char *dev_name = dm_device_name(pool->pool_md);
2632 if (dm_pool_abort_metadata(pool->pmd)) {
2634 set_pool_mode(pool, PM_FAIL);
2637 if (dm_pool_metadata_set_needs_check(pool->pmd)) {
2639 set_pool_mode(pool, PM_FAIL);
2643 static void metadata_operation_failed(struct pool *pool, const char *op, int r)
2646 dm_device_name(pool->pool_md), op, r);
2648 abort_transaction(pool);
2649 set_pool_mode(pool, PM_READ_ONLY);
2663 struct pool *pool = tc->pool;
2669 wake_worker(pool);
2674 struct pool *pool = tc->pool;
2676 throttle_lock(&pool->throttle);
2678 throttle_unlock(&pool->throttle);
2683 struct pool *pool = tc->pool;
2685 throttle_lock(&pool->throttle);
2689 throttle_unlock(&pool->throttle);
2691 wake_worker(pool);
2726 if (get_pool_mode(tc->pool) == PM_FAIL) {
2741 if (bio_detain(tc->pool, &key, bio, &virt_cell))
2771 if (bio_detain(tc->pool, &key, bio, &data_cell)) {
2776 inc_all_io_entry(tc->pool, bio);
2792 * pool is switched to fail-io mode.
2800 static void requeue_bios(struct pool *pool)
2805 list_for_each_entry_rcu(tc, &pool->active_thins, list) {
2815 * Binding of control targets to a pool object
2835 struct pool *pool = pt->pool;
2847 else if (data_limits->max_discard_sectors < pool->sectors_per_block)
2856 static int bind_control_target(struct pool *pool, struct dm_target *ti)
2861 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
2863 enum pool_mode old_mode = get_pool_mode(pool);
2867 * Don't change the pool's mode until set_pool_mode() below.
2868 * Otherwise the pool's process_* function pointers may
2869 * not match the desired pool mode.
2873 pool->ti = ti;
2874 pool->pf = pt->adjusted_pf;
2875 pool->low_water_blocks = pt->low_water_blocks;
2877 set_pool_mode(pool, new_mode);
2882 static void unbind_control_target(struct pool *pool, struct dm_target *ti)
2884 if (pool->ti == ti)
2885 pool->ti = NULL;
2891 /* Initialize pool features. */
2901 static void __pool_destroy(struct pool *pool)
2903 __pool_table_remove(pool);
2905 vfree(pool->cell_sort_array);
2906 if (dm_pool_metadata_close(pool->pmd) < 0)
2909 dm_bio_prison_destroy(pool->prison);
2910 dm_kcopyd_client_destroy(pool->copier);
2912 cancel_delayed_work_sync(&pool->waker);
2913 cancel_delayed_work_sync(&pool->no_space_timeout);
2914 if (pool->wq)
2915 destroy_workqueue(pool->wq);
2917 if (pool->next_mapping)
2918 mempool_free(pool->next_mapping, &pool->mapping_pool);
2919 mempool_exit(&pool->mapping_pool);
2920 bio_uninit(&pool->flush_bio);
2921 dm_deferred_set_destroy(pool->shared_read_ds);
2922 dm_deferred_set_destroy(pool->all_io_ds);
2923 kfree(pool);
2928 static struct pool *pool_create(struct mapped_device *pool_md,
2936 struct pool *pool;
2943 return (struct pool *)pmd;
2946 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
2947 if (!pool) {
2948 *error = "Error allocating memory for pool";
2953 pool->pmd = pmd;
2954 pool->sectors_per_block = block_size;
2956 pool->sectors_per_block_shift = -1;
2958 pool->sectors_per_block_shift = __ffs(block_size);
2959 pool->low_water_blocks = 0;
2960 pool_features_init(&pool->pf);
2961 pool->prison = dm_bio_prison_create();
2962 if (!pool->prison) {
2963 *error = "Error creating pool's bio prison";
2968 pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2969 if (IS_ERR(pool->copier)) {
2970 r = PTR_ERR(pool->copier);
2971 *error = "Error creating pool's kcopyd client";
2980 pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2981 if (!pool->wq) {
2982 *error = "Error creating pool's workqueue";
2987 throttle_init(&pool->throttle);
2988 INIT_WORK(&pool->worker, do_worker);
2989 INIT_DELAYED_WORK(&pool->waker, do_waker);
2990 INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
2991 spin_lock_init(&pool->lock);
2992 bio_list_init(&pool->deferred_flush_bios);
2993 bio_list_init(&pool->deferred_flush_completions);
2994 INIT_LIST_HEAD(&pool->prepared_mappings);
2995 INIT_LIST_HEAD(&pool->prepared_discards);
2996 INIT_LIST_HEAD(&pool->prepared_discards_pt2);
2997 INIT_LIST_HEAD(&pool->active_thins);
2998 pool->low_water_triggered = false;
2999 pool->suspended = true;
3000 pool->out_of_data_space = false;
3001 bio_init(&pool->flush_bio, NULL, 0);
3003 pool->shared_read_ds = dm_deferred_set_create();
3004 if (!pool->shared_read_ds) {
3005 *error = "Error creating pool's shared read deferred set";
3010 pool->all_io_ds = dm_deferred_set_create();
3011 if (!pool->all_io_ds) {
3012 *error = "Error creating pool's all io deferred set";
3017 pool->next_mapping = NULL;
3018 r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
3021 *error = "Error creating pool's mapping mempool";
3026 pool->cell_sort_array =
3028 sizeof(*pool->cell_sort_array)));
3029 if (!pool->cell_sort_array) {
3035 pool->ref_count = 1;
3036 pool->last_commit_jiffies = jiffies;
3037 pool->pool_md = pool_md;
3038 pool->md_dev = metadata_dev;
3039 pool->data_dev = data_dev;
3040 __pool_table_insert(pool);
3042 return pool;
3045 mempool_exit(&pool->mapping_pool);
3047 dm_deferred_set_destroy(pool->all_io_ds);
3049 dm_deferred_set_destroy(pool->shared_read_ds);
3051 destroy_workqueue(pool->wq);
3053 dm_kcopyd_client_destroy(pool->copier);
3055 dm_bio_prison_destroy(pool->prison);
3057 kfree(pool);
3065 static void __pool_inc(struct pool *pool)
3068 pool->ref_count++;
3071 static void __pool_dec(struct pool *pool)
3074 BUG_ON(!pool->ref_count);
3075 if (!--pool->ref_count)
3076 __pool_destroy(pool);
3079 static struct pool *__pool_find(struct mapped_device *pool_md,
3085 struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
3087 if (pool) {
3088 if (pool->pool_md != pool_md) {
3089 *error = "metadata device already in use by a pool";
3092 if (pool->data_dev != data_dev) {
3093 *error = "data device already in use by a pool";
3096 __pool_inc(pool);
3099 pool = __pool_table_lookup(pool_md);
3100 if (pool) {
3101 if (pool->md_dev != metadata_dev || pool->data_dev != data_dev) {
3102 *error = "different pool cannot replace a pool";
3105 __pool_inc(pool);
3108 pool = pool_create(pool_md, metadata_dev, data_dev, block_size, read_only, error);
3113 return pool;
3125 unbind_control_target(pt->pool, ti);
3126 __pool_dec(pt->pool);
3142 {0, 4, "Invalid number of pool feature arguments"},
3175 ti->error = "Unrecognised pool feature requested";
3186 struct pool *pool = context;
3189 dm_device_name(pool->pool_md));
3191 dm_table_event(pool->ti->table);
3207 struct pool *pool = context;
3208 struct bio *flush_bio = &pool->flush_bio;
3211 bio_set_dev(flush_bio, pool->data_dev);
3269 * thin-pool <metadata dev> <data dev>
3278 * read_only: Don't allow any changes to be made to the pool metadata.
3285 struct pool *pool;
3316 * Set default pool features.
3360 pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, data_dev->bdev,
3362 if (IS_ERR(pool)) {
3363 r = PTR_ERR(pool);
3370 * initial load. This would require a pool reload to trigger thin
3373 if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
3379 pt->pool = pool;
3389 * Only need to enable discards if the pool should pass
3398 * stacking of discard limits (this keeps the pool and
3405 r = dm_pool_register_metadata_threshold(pt->pool->pmd,
3408 pool);
3414 dm_pool_register_pre_commit_callback(pool->pmd,
3415 metadata_pre_commit_callback, pool);
3422 __pool_dec(pool);
3439 struct pool *pool = pt->pool;
3444 spin_lock_irq(&pool->lock);
3447 spin_unlock_irq(&pool->lock);
3456 struct pool *pool = pt->pool;
3462 (void) sector_div(data_size, pool->sectors_per_block);
3464 r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
3467 dm_device_name(pool->pool_md));
3472 DMERR("%s: pool target (%llu blocks) too small: expected %llu",
3473 dm_device_name(pool->pool_md),
3478 if (dm_pool_metadata_needs_check(pool->pmd)) {
3480 dm_device_name(pool->pool_md));
3486 dm_device_name(pool->pool_md),
3488 r = dm_pool_resize_data_dev(pool->pmd, data_size);
3490 metadata_operation_failed(pool, "dm_pool_resize_data_dev", r);
3504 struct pool *pool = pt->pool;
3509 metadata_dev_size = get_metadata_dev_size_in_blocks(pool->md_dev);
3511 r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
3514 dm_device_name(pool->pool_md));
3520 dm_device_name(pool->pool_md),
3525 if (dm_pool_metadata_needs_check(pool->pmd)) {
3527 dm_device_name(pool->pool_md));
3531 warn_if_metadata_device_too_big(pool->md_dev);
3533 dm_device_name(pool->pool_md),
3536 if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
3537 set_pool_mode(pool, PM_WRITE);
3539 r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
3541 metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
3567 struct pool *pool = pt->pool;
3570 * Take control of the pool object.
3572 r = bind_control_target(pool, ti);
3585 (void) commit(pool);
3588 * When a thin-pool is PM_FAIL, it cannot be rebuilt if
3592 if (r && get_pool_mode(pool) == PM_FAIL)
3598 static void pool_suspend_active_thins(struct pool *pool)
3603 tc = get_first_thin(pool);
3606 tc = get_next_thin(pool, tc);
3610 static void pool_resume_active_thins(struct pool *pool)
3615 tc = get_first_thin(pool);
3618 tc = get_next_thin(pool, tc);
3625 struct pool *pool = pt->pool;
3631 requeue_bios(pool);
3632 pool_resume_active_thins(pool);
3634 spin_lock_irq(&pool->lock);
3635 pool->low_water_triggered = false;
3636 pool->suspended = false;
3637 spin_unlock_irq(&pool->lock);
3639 do_waker(&pool->waker.work);
3645 struct pool *pool = pt->pool;
3647 spin_lock_irq(&pool->lock);
3648 pool->suspended = true;
3649 spin_unlock_irq(&pool->lock);
3651 pool_suspend_active_thins(pool);
3657 struct pool *pool = pt->pool;
3659 pool_resume_active_thins(pool);
3661 spin_lock_irq(&pool->lock);
3662 pool->suspended = false;
3663 spin_unlock_irq(&pool->lock);
3669 struct pool *pool = pt->pool;
3671 cancel_delayed_work_sync(&pool->waker);
3672 cancel_delayed_work_sync(&pool->no_space_timeout);
3673 flush_workqueue(pool->wq);
3674 (void) commit(pool);
3700 static int process_create_thin_mesg(unsigned argc, char **argv, struct pool *pool)
3713 r = dm_pool_create_thin(pool->pmd, dev_id);
3723 static int process_create_snap_mesg(unsigned argc, char **argv, struct pool *pool)
3741 r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
3751 static int process_delete_mesg(unsigned argc, char **argv, struct pool *pool)
3764 r = dm_pool_delete_thin_device(pool->pmd, dev_id);
3771 static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct pool *pool)
3790 r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
3800 static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
3808 (void) commit(pool);
3810 r = dm_pool_reserve_metadata_snap(pool->pmd);
3817 static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
3825 r = dm_pool_release_metadata_snap(pool->pmd);
3846 struct pool *pool = pt->pool;
3848 if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
3849 DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
3850 dm_device_name(pool->pool_md));
3855 r = process_create_thin_mesg(argc, argv, pool);
3858 r = process_create_snap_mesg(argc, argv, pool);
3861 r = process_delete_mesg(argc, argv, pool);
3864 r = process_set_transaction_id_mesg(argc, argv, pool);
3867 r = process_reserve_metadata_snap_mesg(argc, argv, pool);
3870 r = process_release_metadata_snap_mesg(argc, argv, pool);
3873 DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
3876 (void) commit(pool);
3909 * <pool mode> <discard config> <no space config> <needs_check>
3926 struct pool *pool = pt->pool;
3930 if (get_pool_mode(pool) == PM_FAIL) {
3937 (void) commit(pool);
3939 r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
3942 dm_device_name(pool->pool_md), r);
3946 r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
3949 dm_device_name(pool->pool_md), r);
3953 r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
3956 dm_device_name(pool->pool_md), r);
3960 r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
3963 dm_device_name(pool->pool_md), r);
3967 r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
3970 dm_device_name(pool->pool_md), r);
3974 r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
3977 dm_device_name(pool->pool_md), r);
3993 mode = get_pool_mode(pool);
4001 if (!pool->pf.discard_enabled)
4003 else if (pool->pf.discard_passdown)
4008 if (pool->pf.error_if_no_space)
4013 if (dm_pool_metadata_needs_check(pool->pmd))
4026 (unsigned long)pool->sectors_per_block,
4048 struct pool *pool = pt->pool;
4052 * If max_sectors is smaller than pool->sectors_per_block adjust it
4053 * to the highest possible power-of-2 factor of pool->sectors_per_block.
4054 * This is especially beneficial when the pool's data device is a RAID
4055 * device that has a full stripe width that matches pool->sectors_per_block
4060 if (limits->max_sectors < pool->sectors_per_block) {
4061 while (!is_factor(pool->sectors_per_block, limits->max_sectors)) {
4070 * pool's blocksize (io_opt is a factor) do not override them.
4072 if (io_opt_sectors < pool->sectors_per_block ||
4073 !is_factor(io_opt_sectors, pool->sectors_per_block)) {
4074 if (is_factor(pool->sectors_per_block, limits->max_sectors))
4077 blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
4078 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
4083 * They get transferred to the live pool in bind_control_target()
4089 * block layer will stack them if pool's data device has support.
4100 * The pool uses the same discard limits as the underlying data
4106 .name = "thin-pool",
4143 spin_lock_irq(&tc->pool->lock);
4145 spin_unlock_irq(&tc->pool->lock);
4153 __pool_dec(tc->pool);
4168 * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
4170 * origin_dev: a device external to the pool that should act as the origin
4172 * If the pool device has discards disabled, they get disabled for the thin
4220 ti->error = "Error opening pool device";
4233 ti->error = "Couldn't get pool mapped device";
4238 tc->pool = __pool_table_lookup(pool_md);
4239 if (!tc->pool) {
4240 ti->error = "Couldn't find pool object";
4244 __pool_inc(tc->pool);
4246 if (get_pool_mode(tc->pool) == PM_FAIL) {
4252 r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
4258 r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
4267 /* In case the pool supports discards, pass them on. */
4268 if (tc->pool->pf.discard_enabled) {
4275 spin_lock_irq(&tc->pool->lock);
4276 if (tc->pool->suspended) {
4277 spin_unlock_irq(&tc->pool->lock);
4279 ti->error = "Unable to activate thin device while pool is suspended";
4285 list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
4286 spin_unlock_irq(&tc->pool->lock);
4302 __pool_dec(tc->pool);
4332 struct pool *pool = h->tc->pool;
4338 spin_lock_irqsave(&pool->lock, flags);
4343 spin_unlock_irqrestore(&pool->lock, flags);
4350 spin_lock_irqsave(&pool->lock, flags);
4352 list_add_tail(&m->list, &pool->prepared_discards);
4353 spin_unlock_irqrestore(&pool->lock, flags);
4354 wake_worker(pool);
4405 if (get_pool_mode(tc->pool) == PM_FAIL) {
4427 DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
4430 tc->pool->sectors_per_block) - 1);
4456 struct pool *pool = tc->pool;
4460 * we follow a more convoluted path through to the pool's target.
4462 if (!pool->ti)
4465 blocks = pool->ti->len;
4466 (void) sector_div(blocks, pool->sectors_per_block);
4468 return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
4476 struct pool *pool = tc->pool;
4478 if (!pool->pf.discard_enabled)
4481 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;