18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2004-2005 IBM Corp.  All Rights Reserved.
38c2ecf20Sopenharmony_ci * Copyright (C) 2006-2009 NEC Corporation.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * dm-queue-length.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Module Author: Stefan Bader, IBM
88c2ecf20Sopenharmony_ci * Modified by: Kiyoshi Ueda, NEC
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci * This file is released under the GPL.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * queue-length path selector - choose a path with the least number of
138c2ecf20Sopenharmony_ci * in-flight I/Os.
148c2ecf20Sopenharmony_ci */
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include "dm.h"
178c2ecf20Sopenharmony_ci#include "dm-path-selector.h"
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include <linux/slab.h>
208c2ecf20Sopenharmony_ci#include <linux/ctype.h>
218c2ecf20Sopenharmony_ci#include <linux/errno.h>
228c2ecf20Sopenharmony_ci#include <linux/module.h>
238c2ecf20Sopenharmony_ci#include <linux/atomic.h>
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX	"multipath queue-length"
268c2ecf20Sopenharmony_ci#define QL_MIN_IO	1
278c2ecf20Sopenharmony_ci#define QL_VERSION	"0.2.0"
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_cistruct selector {
308c2ecf20Sopenharmony_ci	struct list_head	valid_paths;
318c2ecf20Sopenharmony_ci	struct list_head	failed_paths;
328c2ecf20Sopenharmony_ci	spinlock_t lock;
338c2ecf20Sopenharmony_ci};
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistruct path_info {
368c2ecf20Sopenharmony_ci	struct list_head	list;
378c2ecf20Sopenharmony_ci	struct dm_path		*path;
388c2ecf20Sopenharmony_ci	unsigned		repeat_count;
398c2ecf20Sopenharmony_ci	atomic_t		qlen;	/* the number of in-flight I/Os */
408c2ecf20Sopenharmony_ci};
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistatic struct selector *alloc_selector(void)
438c2ecf20Sopenharmony_ci{
448c2ecf20Sopenharmony_ci	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	if (s) {
478c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&s->valid_paths);
488c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&s->failed_paths);
498c2ecf20Sopenharmony_ci		spin_lock_init(&s->lock);
508c2ecf20Sopenharmony_ci	}
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	return s;
538c2ecf20Sopenharmony_ci}
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic int ql_create(struct path_selector *ps, unsigned argc, char **argv)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	struct selector *s = alloc_selector();
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	if (!s)
608c2ecf20Sopenharmony_ci		return -ENOMEM;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	ps->context = s;
638c2ecf20Sopenharmony_ci	return 0;
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_cistatic void ql_free_paths(struct list_head *paths)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	struct path_info *pi, *next;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	list_for_each_entry_safe(pi, next, paths, list) {
718c2ecf20Sopenharmony_ci		list_del(&pi->list);
728c2ecf20Sopenharmony_ci		kfree(pi);
738c2ecf20Sopenharmony_ci	}
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic void ql_destroy(struct path_selector *ps)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	struct selector *s = ps->context;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	ql_free_paths(&s->valid_paths);
818c2ecf20Sopenharmony_ci	ql_free_paths(&s->failed_paths);
828c2ecf20Sopenharmony_ci	kfree(s);
838c2ecf20Sopenharmony_ci	ps->context = NULL;
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_cistatic int ql_status(struct path_selector *ps, struct dm_path *path,
878c2ecf20Sopenharmony_ci		     status_type_t type, char *result, unsigned maxlen)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	unsigned sz = 0;
908c2ecf20Sopenharmony_ci	struct path_info *pi;
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	/* When called with NULL path, return selector status/args. */
938c2ecf20Sopenharmony_ci	if (!path)
948c2ecf20Sopenharmony_ci		DMEMIT("0 ");
958c2ecf20Sopenharmony_ci	else {
968c2ecf20Sopenharmony_ci		pi = path->pscontext;
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci		switch (type) {
998c2ecf20Sopenharmony_ci		case STATUSTYPE_INFO:
1008c2ecf20Sopenharmony_ci			DMEMIT("%d ", atomic_read(&pi->qlen));
1018c2ecf20Sopenharmony_ci			break;
1028c2ecf20Sopenharmony_ci		case STATUSTYPE_TABLE:
1038c2ecf20Sopenharmony_ci			DMEMIT("%u ", pi->repeat_count);
1048c2ecf20Sopenharmony_ci			break;
1058c2ecf20Sopenharmony_ci		}
1068c2ecf20Sopenharmony_ci	}
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	return sz;
1098c2ecf20Sopenharmony_ci}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_cistatic int ql_add_path(struct path_selector *ps, struct dm_path *path,
1128c2ecf20Sopenharmony_ci		       int argc, char **argv, char **error)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	struct selector *s = ps->context;
1158c2ecf20Sopenharmony_ci	struct path_info *pi;
1168c2ecf20Sopenharmony_ci	unsigned repeat_count = QL_MIN_IO;
1178c2ecf20Sopenharmony_ci	char dummy;
1188c2ecf20Sopenharmony_ci	unsigned long flags;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	/*
1218c2ecf20Sopenharmony_ci	 * Arguments: [<repeat_count>]
1228c2ecf20Sopenharmony_ci	 * 	<repeat_count>: The number of I/Os before switching path.
1238c2ecf20Sopenharmony_ci	 * 			If not given, default (QL_MIN_IO) is used.
1248c2ecf20Sopenharmony_ci	 */
1258c2ecf20Sopenharmony_ci	if (argc > 1) {
1268c2ecf20Sopenharmony_ci		*error = "queue-length ps: incorrect number of arguments";
1278c2ecf20Sopenharmony_ci		return -EINVAL;
1288c2ecf20Sopenharmony_ci	}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
1318c2ecf20Sopenharmony_ci		*error = "queue-length ps: invalid repeat count";
1328c2ecf20Sopenharmony_ci		return -EINVAL;
1338c2ecf20Sopenharmony_ci	}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	if (repeat_count > 1) {
1368c2ecf20Sopenharmony_ci		DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
1378c2ecf20Sopenharmony_ci		repeat_count = 1;
1388c2ecf20Sopenharmony_ci	}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	/* Allocate the path information structure */
1418c2ecf20Sopenharmony_ci	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
1428c2ecf20Sopenharmony_ci	if (!pi) {
1438c2ecf20Sopenharmony_ci		*error = "queue-length ps: Error allocating path information";
1448c2ecf20Sopenharmony_ci		return -ENOMEM;
1458c2ecf20Sopenharmony_ci	}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	pi->path = path;
1488c2ecf20Sopenharmony_ci	pi->repeat_count = repeat_count;
1498c2ecf20Sopenharmony_ci	atomic_set(&pi->qlen, 0);
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	path->pscontext = pi;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	spin_lock_irqsave(&s->lock, flags);
1548c2ecf20Sopenharmony_ci	list_add_tail(&pi->list, &s->valid_paths);
1558c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&s->lock, flags);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	return 0;
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic void ql_fail_path(struct path_selector *ps, struct dm_path *path)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	struct selector *s = ps->context;
1638c2ecf20Sopenharmony_ci	struct path_info *pi = path->pscontext;
1648c2ecf20Sopenharmony_ci	unsigned long flags;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	spin_lock_irqsave(&s->lock, flags);
1678c2ecf20Sopenharmony_ci	list_move(&pi->list, &s->failed_paths);
1688c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&s->lock, flags);
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cistatic int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	struct selector *s = ps->context;
1748c2ecf20Sopenharmony_ci	struct path_info *pi = path->pscontext;
1758c2ecf20Sopenharmony_ci	unsigned long flags;
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	spin_lock_irqsave(&s->lock, flags);
1788c2ecf20Sopenharmony_ci	list_move_tail(&pi->list, &s->valid_paths);
1798c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&s->lock, flags);
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	return 0;
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci/*
1858c2ecf20Sopenharmony_ci * Select a path having the minimum number of in-flight I/Os
1868c2ecf20Sopenharmony_ci */
1878c2ecf20Sopenharmony_cistatic struct dm_path *ql_select_path(struct path_selector *ps, size_t nr_bytes)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	struct selector *s = ps->context;
1908c2ecf20Sopenharmony_ci	struct path_info *pi = NULL, *best = NULL;
1918c2ecf20Sopenharmony_ci	struct dm_path *ret = NULL;
1928c2ecf20Sopenharmony_ci	unsigned long flags;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	spin_lock_irqsave(&s->lock, flags);
1958c2ecf20Sopenharmony_ci	if (list_empty(&s->valid_paths))
1968c2ecf20Sopenharmony_ci		goto out;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	list_for_each_entry(pi, &s->valid_paths, list) {
1998c2ecf20Sopenharmony_ci		if (!best ||
2008c2ecf20Sopenharmony_ci		    (atomic_read(&pi->qlen) < atomic_read(&best->qlen)))
2018c2ecf20Sopenharmony_ci			best = pi;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci		if (!atomic_read(&best->qlen))
2048c2ecf20Sopenharmony_ci			break;
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	if (!best)
2088c2ecf20Sopenharmony_ci		goto out;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	/* Move most recently used to least preferred to evenly balance. */
2118c2ecf20Sopenharmony_ci	list_move_tail(&best->list, &s->valid_paths);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	ret = best->path;
2148c2ecf20Sopenharmony_ciout:
2158c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&s->lock, flags);
2168c2ecf20Sopenharmony_ci	return ret;
2178c2ecf20Sopenharmony_ci}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_cistatic int ql_start_io(struct path_selector *ps, struct dm_path *path,
2208c2ecf20Sopenharmony_ci		       size_t nr_bytes)
2218c2ecf20Sopenharmony_ci{
2228c2ecf20Sopenharmony_ci	struct path_info *pi = path->pscontext;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	atomic_inc(&pi->qlen);
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	return 0;
2278c2ecf20Sopenharmony_ci}
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_cistatic int ql_end_io(struct path_selector *ps, struct dm_path *path,
2308c2ecf20Sopenharmony_ci		     size_t nr_bytes, u64 start_time)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	struct path_info *pi = path->pscontext;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	atomic_dec(&pi->qlen);
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	return 0;
2378c2ecf20Sopenharmony_ci}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_cistatic struct path_selector_type ql_ps = {
2408c2ecf20Sopenharmony_ci	.name		= "queue-length",
2418c2ecf20Sopenharmony_ci	.module		= THIS_MODULE,
2428c2ecf20Sopenharmony_ci	.table_args	= 1,
2438c2ecf20Sopenharmony_ci	.info_args	= 1,
2448c2ecf20Sopenharmony_ci	.create		= ql_create,
2458c2ecf20Sopenharmony_ci	.destroy	= ql_destroy,
2468c2ecf20Sopenharmony_ci	.status		= ql_status,
2478c2ecf20Sopenharmony_ci	.add_path	= ql_add_path,
2488c2ecf20Sopenharmony_ci	.fail_path	= ql_fail_path,
2498c2ecf20Sopenharmony_ci	.reinstate_path	= ql_reinstate_path,
2508c2ecf20Sopenharmony_ci	.select_path	= ql_select_path,
2518c2ecf20Sopenharmony_ci	.start_io	= ql_start_io,
2528c2ecf20Sopenharmony_ci	.end_io		= ql_end_io,
2538c2ecf20Sopenharmony_ci};
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_cistatic int __init dm_ql_init(void)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	int r = dm_register_path_selector(&ql_ps);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	if (r < 0)
2608c2ecf20Sopenharmony_ci		DMERR("register failed %d", r);
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	DMINFO("version " QL_VERSION " loaded");
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	return r;
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic void __exit dm_ql_exit(void)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	int r = dm_unregister_path_selector(&ql_ps);
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	if (r < 0)
2728c2ecf20Sopenharmony_ci		DMERR("unregister failed %d", r);
2738c2ecf20Sopenharmony_ci}
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_cimodule_init(dm_ql_init);
2768c2ecf20Sopenharmony_cimodule_exit(dm_ql_exit);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ciMODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>");
2798c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(
2808c2ecf20Sopenharmony_ci	"(C) Copyright IBM Corp. 2004,2005   All Rights Reserved.\n"
2818c2ecf20Sopenharmony_ci	DM_NAME " path selector to balance the number of in-flight I/Os"
2828c2ecf20Sopenharmony_ci);
2838c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
284