18c2ecf20Sopenharmony_ci===================
28c2ecf20Sopenharmony_ciBlock io priorities
38c2ecf20Sopenharmony_ci===================
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ciIntro
78c2ecf20Sopenharmony_ci-----
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ciWith the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
108c2ecf20Sopenharmony_cipriorities are supported for reads on files.  This enables users to io nice
118c2ecf20Sopenharmony_ciprocesses or process groups, similar to what has been possible with cpu
128c2ecf20Sopenharmony_cischeduling for ages.  This document mainly details the current possibilities
138c2ecf20Sopenharmony_ciwith cfq; other io schedulers do not support io priorities thus far.
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ciScheduling classes
168c2ecf20Sopenharmony_ci------------------
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ciCFQ implements three generic scheduling classes that determine how io is
198c2ecf20Sopenharmony_ciserved for a process.
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ciIOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
228c2ecf20Sopenharmony_cihigher priority than any other in the system, processes from this class are
238c2ecf20Sopenharmony_cigiven first access to the disk every time. Thus it needs to be used with some
248c2ecf20Sopenharmony_cicare, one io RT process can starve the entire system. Within the RT class,
258c2ecf20Sopenharmony_cithere are 8 levels of class data that determine exactly how much time this
268c2ecf20Sopenharmony_ciprocess needs the disk for on each service. In the future this might change
278c2ecf20Sopenharmony_cito be more directly mappable to performance, by passing in a wanted data
288c2ecf20Sopenharmony_cirate instead.
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ciIOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
318c2ecf20Sopenharmony_cifor any process that hasn't set a specific io priority. The class data
328c2ecf20Sopenharmony_cidetermines how much io bandwidth the process will get, it's directly mappable
338c2ecf20Sopenharmony_cito the cpu nice levels just more coarsely implemented. 0 is the highest
348c2ecf20Sopenharmony_ciBE prio level, 7 is the lowest. The mapping between cpu nice level and io
358c2ecf20Sopenharmony_cinice level is determined as: io_nice = (cpu_nice + 20) / 5.
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ciIOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
388c2ecf20Sopenharmony_cilevel only get io time when no one else needs the disk. The idle class has no
398c2ecf20Sopenharmony_ciclass data, since it doesn't really apply here.
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ciTools
428c2ecf20Sopenharmony_ci-----
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ciSee below for a sample ionice tool. Usage::
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	# ionice -c<class> -n<level> -p<pid>
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ciIf pid isn't given, the current process is assumed. IO priority settings
498c2ecf20Sopenharmony_ciare inherited on fork, so you can use ionice to start the process at a given
508c2ecf20Sopenharmony_cilevel::
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	# ionice -c2 -n0 /bin/ls
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ciwill run ls at the best-effort scheduling class at the highest priority.
558c2ecf20Sopenharmony_ciFor a running process, you can give the pid instead::
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	# ionice -c1 -n2 -p100
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ciwill change pid 100 to run at the realtime scheduling class, at priority 2.
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ciionice.c tool::
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci  #include <stdio.h>
648c2ecf20Sopenharmony_ci  #include <stdlib.h>
658c2ecf20Sopenharmony_ci  #include <errno.h>
668c2ecf20Sopenharmony_ci  #include <getopt.h>
678c2ecf20Sopenharmony_ci  #include <unistd.h>
688c2ecf20Sopenharmony_ci  #include <sys/ptrace.h>
698c2ecf20Sopenharmony_ci  #include <asm/unistd.h>
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci  extern int sys_ioprio_set(int, int, int);
728c2ecf20Sopenharmony_ci  extern int sys_ioprio_get(int, int);
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci  #if defined(__i386__)
758c2ecf20Sopenharmony_ci  #define __NR_ioprio_set		289
768c2ecf20Sopenharmony_ci  #define __NR_ioprio_get		290
778c2ecf20Sopenharmony_ci  #elif defined(__ppc__)
788c2ecf20Sopenharmony_ci  #define __NR_ioprio_set		273
798c2ecf20Sopenharmony_ci  #define __NR_ioprio_get		274
808c2ecf20Sopenharmony_ci  #elif defined(__x86_64__)
818c2ecf20Sopenharmony_ci  #define __NR_ioprio_set		251
828c2ecf20Sopenharmony_ci  #define __NR_ioprio_get		252
838c2ecf20Sopenharmony_ci  #elif defined(__ia64__)
848c2ecf20Sopenharmony_ci  #define __NR_ioprio_set		1274
858c2ecf20Sopenharmony_ci  #define __NR_ioprio_get		1275
868c2ecf20Sopenharmony_ci  #else
878c2ecf20Sopenharmony_ci  #error "Unsupported arch"
888c2ecf20Sopenharmony_ci  #endif
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci  static inline int ioprio_set(int which, int who, int ioprio)
918c2ecf20Sopenharmony_ci  {
928c2ecf20Sopenharmony_ci	return syscall(__NR_ioprio_set, which, who, ioprio);
938c2ecf20Sopenharmony_ci  }
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci  static inline int ioprio_get(int which, int who)
968c2ecf20Sopenharmony_ci  {
978c2ecf20Sopenharmony_ci	return syscall(__NR_ioprio_get, which, who);
988c2ecf20Sopenharmony_ci  }
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci  enum {
1018c2ecf20Sopenharmony_ci	IOPRIO_CLASS_NONE,
1028c2ecf20Sopenharmony_ci	IOPRIO_CLASS_RT,
1038c2ecf20Sopenharmony_ci	IOPRIO_CLASS_BE,
1048c2ecf20Sopenharmony_ci	IOPRIO_CLASS_IDLE,
1058c2ecf20Sopenharmony_ci  };
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci  enum {
1088c2ecf20Sopenharmony_ci	IOPRIO_WHO_PROCESS = 1,
1098c2ecf20Sopenharmony_ci	IOPRIO_WHO_PGRP,
1108c2ecf20Sopenharmony_ci	IOPRIO_WHO_USER,
1118c2ecf20Sopenharmony_ci  };
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci  #define IOPRIO_CLASS_SHIFT	13
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci  int main(int argc, char *argv[])
1188c2ecf20Sopenharmony_ci  {
1198c2ecf20Sopenharmony_ci	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
1208c2ecf20Sopenharmony_ci	int c, pid = 0;
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
1238c2ecf20Sopenharmony_ci		switch (c) {
1248c2ecf20Sopenharmony_ci		case 'n':
1258c2ecf20Sopenharmony_ci			ioprio = strtol(optarg, NULL, 10);
1268c2ecf20Sopenharmony_ci			set = 1;
1278c2ecf20Sopenharmony_ci			break;
1288c2ecf20Sopenharmony_ci		case 'c':
1298c2ecf20Sopenharmony_ci			ioprio_class = strtol(optarg, NULL, 10);
1308c2ecf20Sopenharmony_ci			set = 1;
1318c2ecf20Sopenharmony_ci			break;
1328c2ecf20Sopenharmony_ci		case 'p':
1338c2ecf20Sopenharmony_ci			pid = strtol(optarg, NULL, 10);
1348c2ecf20Sopenharmony_ci			break;
1358c2ecf20Sopenharmony_ci		}
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	switch (ioprio_class) {
1398c2ecf20Sopenharmony_ci		case IOPRIO_CLASS_NONE:
1408c2ecf20Sopenharmony_ci			ioprio_class = IOPRIO_CLASS_BE;
1418c2ecf20Sopenharmony_ci			break;
1428c2ecf20Sopenharmony_ci		case IOPRIO_CLASS_RT:
1438c2ecf20Sopenharmony_ci		case IOPRIO_CLASS_BE:
1448c2ecf20Sopenharmony_ci			break;
1458c2ecf20Sopenharmony_ci		case IOPRIO_CLASS_IDLE:
1468c2ecf20Sopenharmony_ci			ioprio = 7;
1478c2ecf20Sopenharmony_ci			break;
1488c2ecf20Sopenharmony_ci		default:
1498c2ecf20Sopenharmony_ci			printf("bad prio class %d\n", ioprio_class);
1508c2ecf20Sopenharmony_ci			return 1;
1518c2ecf20Sopenharmony_ci	}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	if (!set) {
1548c2ecf20Sopenharmony_ci		if (!pid && argv[optind])
1558c2ecf20Sopenharmony_ci			pid = strtol(argv[optind], NULL, 10);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci		printf("pid=%d, %d\n", pid, ioprio);
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci		if (ioprio == -1)
1628c2ecf20Sopenharmony_ci			perror("ioprio_get");
1638c2ecf20Sopenharmony_ci		else {
1648c2ecf20Sopenharmony_ci			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
1658c2ecf20Sopenharmony_ci			ioprio = ioprio & 0xff;
1668c2ecf20Sopenharmony_ci			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
1678c2ecf20Sopenharmony_ci		}
1688c2ecf20Sopenharmony_ci	} else {
1698c2ecf20Sopenharmony_ci		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
1708c2ecf20Sopenharmony_ci			perror("ioprio_set");
1718c2ecf20Sopenharmony_ci			return 1;
1728c2ecf20Sopenharmony_ci		}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci		if (argv[optind])
1758c2ecf20Sopenharmony_ci			execvp(argv[optind], &argv[optind]);
1768c2ecf20Sopenharmony_ci	}
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	return 0;
1798c2ecf20Sopenharmony_ci  }
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ciMarch 11 2005, Jens Axboe <jens.axboe@oracle.com>
183