162306a36Sopenharmony_ci=================== 262306a36Sopenharmony_ciBlock io priorities 362306a36Sopenharmony_ci=================== 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci 662306a36Sopenharmony_ciIntro 762306a36Sopenharmony_ci----- 862306a36Sopenharmony_ci 962306a36Sopenharmony_ciWith the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io 1062306a36Sopenharmony_cipriorities are supported for reads on files. This enables users to io nice 1162306a36Sopenharmony_ciprocesses or process groups, similar to what has been possible with cpu 1262306a36Sopenharmony_cischeduling for ages. This document mainly details the current possibilities 1362306a36Sopenharmony_ciwith cfq; other io schedulers do not support io priorities thus far. 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ciScheduling classes 1662306a36Sopenharmony_ci------------------ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ciCFQ implements three generic scheduling classes that determine how io is 1962306a36Sopenharmony_ciserved for a process. 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ciIOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given 2262306a36Sopenharmony_cihigher priority than any other in the system, processes from this class are 2362306a36Sopenharmony_cigiven first access to the disk every time. Thus it needs to be used with some 2462306a36Sopenharmony_cicare, one io RT process can starve the entire system. Within the RT class, 2562306a36Sopenharmony_cithere are 8 levels of class data that determine exactly how much time this 2662306a36Sopenharmony_ciprocess needs the disk for on each service. In the future this might change 2762306a36Sopenharmony_cito be more directly mappable to performance, by passing in a wanted data 2862306a36Sopenharmony_cirate instead. 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ciIOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default 3162306a36Sopenharmony_cifor any process that hasn't set a specific io priority. The class data 3262306a36Sopenharmony_cidetermines how much io bandwidth the process will get, it's directly mappable 3362306a36Sopenharmony_cito the cpu nice levels just more coarsely implemented. 0 is the highest 3462306a36Sopenharmony_ciBE prio level, 7 is the lowest. The mapping between cpu nice level and io 3562306a36Sopenharmony_cinice level is determined as: io_nice = (cpu_nice + 20) / 5. 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ciIOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this 3862306a36Sopenharmony_cilevel only get io time when no one else needs the disk. The idle class has no 3962306a36Sopenharmony_ciclass data, since it doesn't really apply here. 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ciTools 4262306a36Sopenharmony_ci----- 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ciSee below for a sample ionice tool. Usage:: 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci # ionice -c<class> -n<level> -p<pid> 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ciIf pid isn't given, the current process is assumed. IO priority settings 4962306a36Sopenharmony_ciare inherited on fork, so you can use ionice to start the process at a given 5062306a36Sopenharmony_cilevel:: 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci # ionice -c2 -n0 /bin/ls 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ciwill run ls at the best-effort scheduling class at the highest priority. 5562306a36Sopenharmony_ciFor a running process, you can give the pid instead:: 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci # ionice -c1 -n2 -p100 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ciwill change pid 100 to run at the realtime scheduling class, at priority 2. 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ciionice.c tool:: 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci #include <stdio.h> 6462306a36Sopenharmony_ci #include <stdlib.h> 6562306a36Sopenharmony_ci #include <errno.h> 6662306a36Sopenharmony_ci #include <getopt.h> 6762306a36Sopenharmony_ci #include <unistd.h> 6862306a36Sopenharmony_ci #include <sys/ptrace.h> 6962306a36Sopenharmony_ci #include <asm/unistd.h> 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci extern int sys_ioprio_set(int, int, int); 7262306a36Sopenharmony_ci extern int sys_ioprio_get(int, int); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci #if defined(__i386__) 7562306a36Sopenharmony_ci #define __NR_ioprio_set 289 7662306a36Sopenharmony_ci #define __NR_ioprio_get 290 7762306a36Sopenharmony_ci #elif defined(__ppc__) 7862306a36Sopenharmony_ci #define __NR_ioprio_set 273 7962306a36Sopenharmony_ci #define __NR_ioprio_get 274 8062306a36Sopenharmony_ci #elif defined(__x86_64__) 8162306a36Sopenharmony_ci #define __NR_ioprio_set 251 8262306a36Sopenharmony_ci #define __NR_ioprio_get 252 8362306a36Sopenharmony_ci #elif defined(__ia64__) 8462306a36Sopenharmony_ci #define __NR_ioprio_set 1274 8562306a36Sopenharmony_ci #define __NR_ioprio_get 1275 8662306a36Sopenharmony_ci #else 8762306a36Sopenharmony_ci #error "Unsupported arch" 8862306a36Sopenharmony_ci #endif 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci static inline int ioprio_set(int which, int who, int ioprio) 9162306a36Sopenharmony_ci { 9262306a36Sopenharmony_ci return syscall(__NR_ioprio_set, which, who, ioprio); 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci static inline int ioprio_get(int which, int who) 9662306a36Sopenharmony_ci { 9762306a36Sopenharmony_ci return syscall(__NR_ioprio_get, which, who); 9862306a36Sopenharmony_ci } 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci enum { 10162306a36Sopenharmony_ci IOPRIO_CLASS_NONE, 10262306a36Sopenharmony_ci IOPRIO_CLASS_RT, 10362306a36Sopenharmony_ci IOPRIO_CLASS_BE, 10462306a36Sopenharmony_ci IOPRIO_CLASS_IDLE, 10562306a36Sopenharmony_ci }; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci enum { 10862306a36Sopenharmony_ci IOPRIO_WHO_PROCESS = 1, 10962306a36Sopenharmony_ci IOPRIO_WHO_PGRP, 11062306a36Sopenharmony_ci IOPRIO_WHO_USER, 11162306a36Sopenharmony_ci }; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci #define IOPRIO_CLASS_SHIFT 13 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci const char *to_prio[] = { "none", "realtime", "best-effort", "idle", }; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci int main(int argc, char *argv[]) 11862306a36Sopenharmony_ci { 11962306a36Sopenharmony_ci int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE; 12062306a36Sopenharmony_ci int c, pid = 0; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) { 12362306a36Sopenharmony_ci switch (c) { 12462306a36Sopenharmony_ci case 'n': 12562306a36Sopenharmony_ci ioprio = strtol(optarg, NULL, 10); 12662306a36Sopenharmony_ci set = 1; 12762306a36Sopenharmony_ci break; 12862306a36Sopenharmony_ci case 'c': 12962306a36Sopenharmony_ci ioprio_class = strtol(optarg, NULL, 10); 13062306a36Sopenharmony_ci set = 1; 13162306a36Sopenharmony_ci break; 13262306a36Sopenharmony_ci case 'p': 13362306a36Sopenharmony_ci pid = strtol(optarg, NULL, 10); 13462306a36Sopenharmony_ci break; 13562306a36Sopenharmony_ci } 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci switch (ioprio_class) { 13962306a36Sopenharmony_ci case IOPRIO_CLASS_NONE: 14062306a36Sopenharmony_ci ioprio_class = IOPRIO_CLASS_BE; 14162306a36Sopenharmony_ci break; 14262306a36Sopenharmony_ci case IOPRIO_CLASS_RT: 14362306a36Sopenharmony_ci case IOPRIO_CLASS_BE: 14462306a36Sopenharmony_ci break; 14562306a36Sopenharmony_ci case IOPRIO_CLASS_IDLE: 14662306a36Sopenharmony_ci ioprio = 7; 14762306a36Sopenharmony_ci break; 14862306a36Sopenharmony_ci default: 14962306a36Sopenharmony_ci printf("bad prio class %d\n", ioprio_class); 15062306a36Sopenharmony_ci return 1; 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci if (!set) { 15462306a36Sopenharmony_ci if (!pid && argv[optind]) 15562306a36Sopenharmony_ci pid = strtol(argv[optind], NULL, 10); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci printf("pid=%d, %d\n", pid, ioprio); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci if (ioprio == -1) 16262306a36Sopenharmony_ci perror("ioprio_get"); 16362306a36Sopenharmony_ci else { 16462306a36Sopenharmony_ci ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT; 16562306a36Sopenharmony_ci ioprio = ioprio & 0xff; 16662306a36Sopenharmony_ci printf("%s: prio %d\n", to_prio[ioprio_class], ioprio); 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci } else { 16962306a36Sopenharmony_ci if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) { 17062306a36Sopenharmony_ci perror("ioprio_set"); 17162306a36Sopenharmony_ci return 1; 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (argv[optind]) 17562306a36Sopenharmony_ci execvp(argv[optind], &argv[optind]); 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci return 0; 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ciMarch 11 2005, Jens Axboe <jens.axboe@oracle.com> 183