18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2004-2005 IBM Corp. All Rights Reserved. 38c2ecf20Sopenharmony_ci * Copyright (C) 2006-2009 NEC Corporation. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * dm-queue-length.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Module Author: Stefan Bader, IBM 88c2ecf20Sopenharmony_ci * Modified by: Kiyoshi Ueda, NEC 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * This file is released under the GPL. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * queue-length path selector - choose a path with the least number of 138c2ecf20Sopenharmony_ci * in-flight I/Os. 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include "dm.h" 178c2ecf20Sopenharmony_ci#include "dm-path-selector.h" 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include <linux/slab.h> 208c2ecf20Sopenharmony_ci#include <linux/ctype.h> 218c2ecf20Sopenharmony_ci#include <linux/errno.h> 228c2ecf20Sopenharmony_ci#include <linux/module.h> 238c2ecf20Sopenharmony_ci#include <linux/atomic.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "multipath queue-length" 268c2ecf20Sopenharmony_ci#define QL_MIN_IO 1 278c2ecf20Sopenharmony_ci#define QL_VERSION "0.2.0" 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_cistruct selector { 308c2ecf20Sopenharmony_ci struct list_head valid_paths; 318c2ecf20Sopenharmony_ci struct list_head failed_paths; 328c2ecf20Sopenharmony_ci spinlock_t lock; 338c2ecf20Sopenharmony_ci}; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistruct path_info { 368c2ecf20Sopenharmony_ci struct list_head list; 378c2ecf20Sopenharmony_ci struct dm_path *path; 388c2ecf20Sopenharmony_ci unsigned repeat_count; 398c2ecf20Sopenharmony_ci atomic_t qlen; /* the number of in-flight I/Os */ 408c2ecf20Sopenharmony_ci}; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic struct selector *alloc_selector(void) 438c2ecf20Sopenharmony_ci{ 448c2ecf20Sopenharmony_ci struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci if (s) { 478c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->valid_paths); 488c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->failed_paths); 498c2ecf20Sopenharmony_ci spin_lock_init(&s->lock); 508c2ecf20Sopenharmony_ci } 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci return s; 538c2ecf20Sopenharmony_ci} 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistatic int ql_create(struct path_selector *ps, unsigned argc, char **argv) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci struct selector *s = alloc_selector(); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci if (!s) 608c2ecf20Sopenharmony_ci return -ENOMEM; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci ps->context = s; 638c2ecf20Sopenharmony_ci return 0; 648c2ecf20Sopenharmony_ci} 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_cistatic void ql_free_paths(struct list_head *paths) 678c2ecf20Sopenharmony_ci{ 688c2ecf20Sopenharmony_ci struct path_info *pi, *next; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci list_for_each_entry_safe(pi, next, paths, list) { 718c2ecf20Sopenharmony_ci list_del(&pi->list); 728c2ecf20Sopenharmony_ci kfree(pi); 738c2ecf20Sopenharmony_ci } 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_cistatic void ql_destroy(struct path_selector *ps) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci struct selector *s = ps->context; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci ql_free_paths(&s->valid_paths); 818c2ecf20Sopenharmony_ci ql_free_paths(&s->failed_paths); 828c2ecf20Sopenharmony_ci kfree(s); 838c2ecf20Sopenharmony_ci ps->context = NULL; 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistatic int ql_status(struct path_selector *ps, struct dm_path *path, 878c2ecf20Sopenharmony_ci status_type_t type, char *result, unsigned maxlen) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci unsigned sz = 0; 908c2ecf20Sopenharmony_ci struct path_info *pi; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci /* When called with NULL path, return selector status/args. */ 938c2ecf20Sopenharmony_ci if (!path) 948c2ecf20Sopenharmony_ci DMEMIT("0 "); 958c2ecf20Sopenharmony_ci else { 968c2ecf20Sopenharmony_ci pi = path->pscontext; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci switch (type) { 998c2ecf20Sopenharmony_ci case STATUSTYPE_INFO: 1008c2ecf20Sopenharmony_ci DMEMIT("%d ", atomic_read(&pi->qlen)); 1018c2ecf20Sopenharmony_ci break; 1028c2ecf20Sopenharmony_ci case STATUSTYPE_TABLE: 1038c2ecf20Sopenharmony_ci DMEMIT("%u ", pi->repeat_count); 1048c2ecf20Sopenharmony_ci break; 1058c2ecf20Sopenharmony_ci } 1068c2ecf20Sopenharmony_ci } 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci return sz; 1098c2ecf20Sopenharmony_ci} 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistatic int ql_add_path(struct path_selector *ps, struct dm_path *path, 1128c2ecf20Sopenharmony_ci int argc, char **argv, char **error) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci struct selector *s = ps->context; 1158c2ecf20Sopenharmony_ci struct path_info *pi; 1168c2ecf20Sopenharmony_ci unsigned repeat_count = QL_MIN_IO; 1178c2ecf20Sopenharmony_ci char dummy; 1188c2ecf20Sopenharmony_ci unsigned long flags; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* 1218c2ecf20Sopenharmony_ci * Arguments: [<repeat_count>] 1228c2ecf20Sopenharmony_ci * <repeat_count>: The number of I/Os before switching path. 1238c2ecf20Sopenharmony_ci * If not given, default (QL_MIN_IO) is used. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci if (argc > 1) { 1268c2ecf20Sopenharmony_ci *error = "queue-length ps: incorrect number of arguments"; 1278c2ecf20Sopenharmony_ci return -EINVAL; 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { 1318c2ecf20Sopenharmony_ci *error = "queue-length ps: invalid repeat count"; 1328c2ecf20Sopenharmony_ci return -EINVAL; 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci if (repeat_count > 1) { 1368c2ecf20Sopenharmony_ci DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); 1378c2ecf20Sopenharmony_ci repeat_count = 1; 1388c2ecf20Sopenharmony_ci } 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci /* Allocate the path information structure */ 1418c2ecf20Sopenharmony_ci pi = kmalloc(sizeof(*pi), GFP_KERNEL); 1428c2ecf20Sopenharmony_ci if (!pi) { 1438c2ecf20Sopenharmony_ci *error = "queue-length ps: Error allocating path information"; 1448c2ecf20Sopenharmony_ci return -ENOMEM; 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci pi->path = path; 1488c2ecf20Sopenharmony_ci pi->repeat_count = repeat_count; 1498c2ecf20Sopenharmony_ci atomic_set(&pi->qlen, 0); 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci path->pscontext = pi; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci spin_lock_irqsave(&s->lock, flags); 1548c2ecf20Sopenharmony_ci list_add_tail(&pi->list, &s->valid_paths); 1558c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&s->lock, flags); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci return 0; 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_cistatic void ql_fail_path(struct path_selector *ps, struct dm_path *path) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci struct selector *s = ps->context; 1638c2ecf20Sopenharmony_ci struct path_info *pi = path->pscontext; 1648c2ecf20Sopenharmony_ci unsigned long flags; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci spin_lock_irqsave(&s->lock, flags); 1678c2ecf20Sopenharmony_ci list_move(&pi->list, &s->failed_paths); 1688c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&s->lock, flags); 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_cistatic int ql_reinstate_path(struct path_selector *ps, struct dm_path *path) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci struct selector *s = ps->context; 1748c2ecf20Sopenharmony_ci struct path_info *pi = path->pscontext; 1758c2ecf20Sopenharmony_ci unsigned long flags; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci spin_lock_irqsave(&s->lock, flags); 1788c2ecf20Sopenharmony_ci list_move_tail(&pi->list, &s->valid_paths); 1798c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&s->lock, flags); 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci return 0; 1828c2ecf20Sopenharmony_ci} 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci/* 1858c2ecf20Sopenharmony_ci * Select a path having the minimum number of in-flight I/Os 1868c2ecf20Sopenharmony_ci */ 1878c2ecf20Sopenharmony_cistatic struct dm_path *ql_select_path(struct path_selector *ps, size_t nr_bytes) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci struct selector *s = ps->context; 1908c2ecf20Sopenharmony_ci struct path_info *pi = NULL, *best = NULL; 1918c2ecf20Sopenharmony_ci struct dm_path *ret = NULL; 1928c2ecf20Sopenharmony_ci unsigned long flags; 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci spin_lock_irqsave(&s->lock, flags); 1958c2ecf20Sopenharmony_ci if (list_empty(&s->valid_paths)) 1968c2ecf20Sopenharmony_ci goto out; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci list_for_each_entry(pi, &s->valid_paths, list) { 1998c2ecf20Sopenharmony_ci if (!best || 2008c2ecf20Sopenharmony_ci (atomic_read(&pi->qlen) < atomic_read(&best->qlen))) 2018c2ecf20Sopenharmony_ci best = pi; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci if (!atomic_read(&best->qlen)) 2048c2ecf20Sopenharmony_ci break; 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci if (!best) 2088c2ecf20Sopenharmony_ci goto out; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* Move most recently used to least preferred to evenly balance. */ 2118c2ecf20Sopenharmony_ci list_move_tail(&best->list, &s->valid_paths); 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci ret = best->path; 2148c2ecf20Sopenharmony_ciout: 2158c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&s->lock, flags); 2168c2ecf20Sopenharmony_ci return ret; 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_cistatic int ql_start_io(struct path_selector *ps, struct dm_path *path, 2208c2ecf20Sopenharmony_ci size_t nr_bytes) 2218c2ecf20Sopenharmony_ci{ 2228c2ecf20Sopenharmony_ci struct path_info *pi = path->pscontext; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci atomic_inc(&pi->qlen); 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci return 0; 2278c2ecf20Sopenharmony_ci} 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_cistatic int ql_end_io(struct path_selector *ps, struct dm_path *path, 2308c2ecf20Sopenharmony_ci size_t nr_bytes, u64 start_time) 2318c2ecf20Sopenharmony_ci{ 2328c2ecf20Sopenharmony_ci struct path_info *pi = path->pscontext; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci atomic_dec(&pi->qlen); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci return 0; 2378c2ecf20Sopenharmony_ci} 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistatic struct path_selector_type ql_ps = { 2408c2ecf20Sopenharmony_ci .name = "queue-length", 2418c2ecf20Sopenharmony_ci .module = THIS_MODULE, 2428c2ecf20Sopenharmony_ci .table_args = 1, 2438c2ecf20Sopenharmony_ci .info_args = 1, 2448c2ecf20Sopenharmony_ci .create = ql_create, 2458c2ecf20Sopenharmony_ci .destroy = ql_destroy, 2468c2ecf20Sopenharmony_ci .status = ql_status, 2478c2ecf20Sopenharmony_ci .add_path = ql_add_path, 2488c2ecf20Sopenharmony_ci .fail_path = ql_fail_path, 2498c2ecf20Sopenharmony_ci .reinstate_path = ql_reinstate_path, 2508c2ecf20Sopenharmony_ci .select_path = ql_select_path, 2518c2ecf20Sopenharmony_ci .start_io = ql_start_io, 2528c2ecf20Sopenharmony_ci .end_io = ql_end_io, 2538c2ecf20Sopenharmony_ci}; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_cistatic int __init dm_ql_init(void) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci int r = dm_register_path_selector(&ql_ps); 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci if (r < 0) 2608c2ecf20Sopenharmony_ci DMERR("register failed %d", r); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci DMINFO("version " QL_VERSION " loaded"); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci return r; 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic void __exit dm_ql_exit(void) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci int r = dm_unregister_path_selector(&ql_ps); 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci if (r < 0) 2728c2ecf20Sopenharmony_ci DMERR("unregister failed %d", r); 2738c2ecf20Sopenharmony_ci} 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_cimodule_init(dm_ql_init); 2768c2ecf20Sopenharmony_cimodule_exit(dm_ql_exit); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ciMODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>"); 2798c2ecf20Sopenharmony_ciMODULE_DESCRIPTION( 2808c2ecf20Sopenharmony_ci "(C) Copyright IBM Corp. 2004,2005 All Rights Reserved.\n" 2818c2ecf20Sopenharmony_ci DM_NAME " path selector to balance the number of in-flight I/Os" 2828c2ecf20Sopenharmony_ci); 2838c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 284