162306a36Sopenharmony_ci===================
262306a36Sopenharmony_ciBlock io priorities
362306a36Sopenharmony_ci===================
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci
662306a36Sopenharmony_ciIntro
762306a36Sopenharmony_ci-----
862306a36Sopenharmony_ci
962306a36Sopenharmony_ciWith the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
1062306a36Sopenharmony_cipriorities are supported for reads on files.  This enables users to io nice
1162306a36Sopenharmony_ciprocesses or process groups, similar to what has been possible with cpu
1262306a36Sopenharmony_cischeduling for ages.  This document mainly details the current possibilities
1362306a36Sopenharmony_ciwith cfq; other io schedulers do not support io priorities thus far.
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ciScheduling classes
1662306a36Sopenharmony_ci------------------
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ciCFQ implements three generic scheduling classes that determine how io is
1962306a36Sopenharmony_ciserved for a process.
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ciIOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
2262306a36Sopenharmony_cihigher priority than any other in the system, processes from this class are
2362306a36Sopenharmony_cigiven first access to the disk every time. Thus it needs to be used with some
2462306a36Sopenharmony_cicare, one io RT process can starve the entire system. Within the RT class,
2562306a36Sopenharmony_cithere are 8 levels of class data that determine exactly how much time this
2662306a36Sopenharmony_ciprocess needs the disk for on each service. In the future this might change
2762306a36Sopenharmony_cito be more directly mappable to performance, by passing in a wanted data
2862306a36Sopenharmony_cirate instead.
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ciIOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
3162306a36Sopenharmony_cifor any process that hasn't set a specific io priority. The class data
3262306a36Sopenharmony_cidetermines how much io bandwidth the process will get, it's directly mappable
3362306a36Sopenharmony_cito the cpu nice levels just more coarsely implemented. 0 is the highest
3462306a36Sopenharmony_ciBE prio level, 7 is the lowest. The mapping between cpu nice level and io
3562306a36Sopenharmony_cinice level is determined as: io_nice = (cpu_nice + 20) / 5.
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ciIOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
3862306a36Sopenharmony_cilevel only get io time when no one else needs the disk. The idle class has no
3962306a36Sopenharmony_ciclass data, since it doesn't really apply here.
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ciTools
4262306a36Sopenharmony_ci-----
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ciSee below for a sample ionice tool. Usage::
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	# ionice -c<class> -n<level> -p<pid>
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ciIf pid isn't given, the current process is assumed. IO priority settings
4962306a36Sopenharmony_ciare inherited on fork, so you can use ionice to start the process at a given
5062306a36Sopenharmony_cilevel::
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	# ionice -c2 -n0 /bin/ls
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ciwill run ls at the best-effort scheduling class at the highest priority.
5562306a36Sopenharmony_ciFor a running process, you can give the pid instead::
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	# ionice -c1 -n2 -p100
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ciwill change pid 100 to run at the realtime scheduling class, at priority 2.
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ciionice.c tool::
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci  #include <stdio.h>
6462306a36Sopenharmony_ci  #include <stdlib.h>
6562306a36Sopenharmony_ci  #include <errno.h>
6662306a36Sopenharmony_ci  #include <getopt.h>
6762306a36Sopenharmony_ci  #include <unistd.h>
6862306a36Sopenharmony_ci  #include <sys/ptrace.h>
6962306a36Sopenharmony_ci  #include <asm/unistd.h>
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci  extern int sys_ioprio_set(int, int, int);
7262306a36Sopenharmony_ci  extern int sys_ioprio_get(int, int);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci  #if defined(__i386__)
7562306a36Sopenharmony_ci  #define __NR_ioprio_set		289
7662306a36Sopenharmony_ci  #define __NR_ioprio_get		290
7762306a36Sopenharmony_ci  #elif defined(__ppc__)
7862306a36Sopenharmony_ci  #define __NR_ioprio_set		273
7962306a36Sopenharmony_ci  #define __NR_ioprio_get		274
8062306a36Sopenharmony_ci  #elif defined(__x86_64__)
8162306a36Sopenharmony_ci  #define __NR_ioprio_set		251
8262306a36Sopenharmony_ci  #define __NR_ioprio_get		252
8362306a36Sopenharmony_ci  #elif defined(__ia64__)
8462306a36Sopenharmony_ci  #define __NR_ioprio_set		1274
8562306a36Sopenharmony_ci  #define __NR_ioprio_get		1275
8662306a36Sopenharmony_ci  #else
8762306a36Sopenharmony_ci  #error "Unsupported arch"
8862306a36Sopenharmony_ci  #endif
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci  static inline int ioprio_set(int which, int who, int ioprio)
9162306a36Sopenharmony_ci  {
9262306a36Sopenharmony_ci	return syscall(__NR_ioprio_set, which, who, ioprio);
9362306a36Sopenharmony_ci  }
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci  static inline int ioprio_get(int which, int who)
9662306a36Sopenharmony_ci  {
9762306a36Sopenharmony_ci	return syscall(__NR_ioprio_get, which, who);
9862306a36Sopenharmony_ci  }
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci  enum {
10162306a36Sopenharmony_ci	IOPRIO_CLASS_NONE,
10262306a36Sopenharmony_ci	IOPRIO_CLASS_RT,
10362306a36Sopenharmony_ci	IOPRIO_CLASS_BE,
10462306a36Sopenharmony_ci	IOPRIO_CLASS_IDLE,
10562306a36Sopenharmony_ci  };
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci  enum {
10862306a36Sopenharmony_ci	IOPRIO_WHO_PROCESS = 1,
10962306a36Sopenharmony_ci	IOPRIO_WHO_PGRP,
11062306a36Sopenharmony_ci	IOPRIO_WHO_USER,
11162306a36Sopenharmony_ci  };
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci  #define IOPRIO_CLASS_SHIFT	13
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci  int main(int argc, char *argv[])
11862306a36Sopenharmony_ci  {
11962306a36Sopenharmony_ci	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
12062306a36Sopenharmony_ci	int c, pid = 0;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
12362306a36Sopenharmony_ci		switch (c) {
12462306a36Sopenharmony_ci		case 'n':
12562306a36Sopenharmony_ci			ioprio = strtol(optarg, NULL, 10);
12662306a36Sopenharmony_ci			set = 1;
12762306a36Sopenharmony_ci			break;
12862306a36Sopenharmony_ci		case 'c':
12962306a36Sopenharmony_ci			ioprio_class = strtol(optarg, NULL, 10);
13062306a36Sopenharmony_ci			set = 1;
13162306a36Sopenharmony_ci			break;
13262306a36Sopenharmony_ci		case 'p':
13362306a36Sopenharmony_ci			pid = strtol(optarg, NULL, 10);
13462306a36Sopenharmony_ci			break;
13562306a36Sopenharmony_ci		}
13662306a36Sopenharmony_ci	}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	switch (ioprio_class) {
13962306a36Sopenharmony_ci		case IOPRIO_CLASS_NONE:
14062306a36Sopenharmony_ci			ioprio_class = IOPRIO_CLASS_BE;
14162306a36Sopenharmony_ci			break;
14262306a36Sopenharmony_ci		case IOPRIO_CLASS_RT:
14362306a36Sopenharmony_ci		case IOPRIO_CLASS_BE:
14462306a36Sopenharmony_ci			break;
14562306a36Sopenharmony_ci		case IOPRIO_CLASS_IDLE:
14662306a36Sopenharmony_ci			ioprio = 7;
14762306a36Sopenharmony_ci			break;
14862306a36Sopenharmony_ci		default:
14962306a36Sopenharmony_ci			printf("bad prio class %d\n", ioprio_class);
15062306a36Sopenharmony_ci			return 1;
15162306a36Sopenharmony_ci	}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	if (!set) {
15462306a36Sopenharmony_ci		if (!pid && argv[optind])
15562306a36Sopenharmony_ci			pid = strtol(argv[optind], NULL, 10);
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci		printf("pid=%d, %d\n", pid, ioprio);
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci		if (ioprio == -1)
16262306a36Sopenharmony_ci			perror("ioprio_get");
16362306a36Sopenharmony_ci		else {
16462306a36Sopenharmony_ci			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
16562306a36Sopenharmony_ci			ioprio = ioprio & 0xff;
16662306a36Sopenharmony_ci			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
16762306a36Sopenharmony_ci		}
16862306a36Sopenharmony_ci	} else {
16962306a36Sopenharmony_ci		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
17062306a36Sopenharmony_ci			perror("ioprio_set");
17162306a36Sopenharmony_ci			return 1;
17262306a36Sopenharmony_ci		}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci		if (argv[optind])
17562306a36Sopenharmony_ci			execvp(argv[optind], &argv[optind]);
17662306a36Sopenharmony_ci	}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	return 0;
17962306a36Sopenharmony_ci  }
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ciMarch 11 2005, Jens Axboe <jens.axboe@oracle.com>
183