18c2ecf20Sopenharmony_ci=================== 28c2ecf20Sopenharmony_ciBlock io priorities 38c2ecf20Sopenharmony_ci=================== 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ciIntro 78c2ecf20Sopenharmony_ci----- 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ciWith the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io 108c2ecf20Sopenharmony_cipriorities are supported for reads on files. This enables users to io nice 118c2ecf20Sopenharmony_ciprocesses or process groups, similar to what has been possible with cpu 128c2ecf20Sopenharmony_cischeduling for ages. This document mainly details the current possibilities 138c2ecf20Sopenharmony_ciwith cfq; other io schedulers do not support io priorities thus far. 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ciScheduling classes 168c2ecf20Sopenharmony_ci------------------ 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ciCFQ implements three generic scheduling classes that determine how io is 198c2ecf20Sopenharmony_ciserved for a process. 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ciIOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given 228c2ecf20Sopenharmony_cihigher priority than any other in the system, processes from this class are 238c2ecf20Sopenharmony_cigiven first access to the disk every time. Thus it needs to be used with some 248c2ecf20Sopenharmony_cicare, one io RT process can starve the entire system. Within the RT class, 258c2ecf20Sopenharmony_cithere are 8 levels of class data that determine exactly how much time this 268c2ecf20Sopenharmony_ciprocess needs the disk for on each service. In the future this might change 278c2ecf20Sopenharmony_cito be more directly mappable to performance, by passing in a wanted data 288c2ecf20Sopenharmony_cirate instead. 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ciIOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default 318c2ecf20Sopenharmony_cifor any process that hasn't set a specific io priority. The class data 328c2ecf20Sopenharmony_cidetermines how much io bandwidth the process will get, it's directly mappable 338c2ecf20Sopenharmony_cito the cpu nice levels just more coarsely implemented. 0 is the highest 348c2ecf20Sopenharmony_ciBE prio level, 7 is the lowest. The mapping between cpu nice level and io 358c2ecf20Sopenharmony_cinice level is determined as: io_nice = (cpu_nice + 20) / 5. 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ciIOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this 388c2ecf20Sopenharmony_cilevel only get io time when no one else needs the disk. The idle class has no 398c2ecf20Sopenharmony_ciclass data, since it doesn't really apply here. 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ciTools 428c2ecf20Sopenharmony_ci----- 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ciSee below for a sample ionice tool. Usage:: 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci # ionice -c<class> -n<level> -p<pid> 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ciIf pid isn't given, the current process is assumed. IO priority settings 498c2ecf20Sopenharmony_ciare inherited on fork, so you can use ionice to start the process at a given 508c2ecf20Sopenharmony_cilevel:: 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci # ionice -c2 -n0 /bin/ls 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ciwill run ls at the best-effort scheduling class at the highest priority. 558c2ecf20Sopenharmony_ciFor a running process, you can give the pid instead:: 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci # ionice -c1 -n2 -p100 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ciwill change pid 100 to run at the realtime scheduling class, at priority 2. 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ciionice.c tool:: 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci #include <stdio.h> 648c2ecf20Sopenharmony_ci #include <stdlib.h> 658c2ecf20Sopenharmony_ci #include <errno.h> 668c2ecf20Sopenharmony_ci #include <getopt.h> 678c2ecf20Sopenharmony_ci #include <unistd.h> 688c2ecf20Sopenharmony_ci #include <sys/ptrace.h> 698c2ecf20Sopenharmony_ci #include <asm/unistd.h> 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci extern int sys_ioprio_set(int, int, int); 728c2ecf20Sopenharmony_ci extern int sys_ioprio_get(int, int); 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci #if defined(__i386__) 758c2ecf20Sopenharmony_ci #define __NR_ioprio_set 289 768c2ecf20Sopenharmony_ci #define __NR_ioprio_get 290 778c2ecf20Sopenharmony_ci #elif defined(__ppc__) 788c2ecf20Sopenharmony_ci #define __NR_ioprio_set 273 798c2ecf20Sopenharmony_ci #define __NR_ioprio_get 274 808c2ecf20Sopenharmony_ci #elif defined(__x86_64__) 818c2ecf20Sopenharmony_ci #define __NR_ioprio_set 251 828c2ecf20Sopenharmony_ci #define __NR_ioprio_get 252 838c2ecf20Sopenharmony_ci #elif defined(__ia64__) 848c2ecf20Sopenharmony_ci #define __NR_ioprio_set 1274 858c2ecf20Sopenharmony_ci #define __NR_ioprio_get 1275 868c2ecf20Sopenharmony_ci #else 878c2ecf20Sopenharmony_ci #error "Unsupported arch" 888c2ecf20Sopenharmony_ci #endif 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci static inline int ioprio_set(int which, int who, int ioprio) 918c2ecf20Sopenharmony_ci { 928c2ecf20Sopenharmony_ci return syscall(__NR_ioprio_set, which, who, ioprio); 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci static inline int ioprio_get(int which, int who) 968c2ecf20Sopenharmony_ci { 978c2ecf20Sopenharmony_ci return syscall(__NR_ioprio_get, which, who); 988c2ecf20Sopenharmony_ci } 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci enum { 1018c2ecf20Sopenharmony_ci IOPRIO_CLASS_NONE, 1028c2ecf20Sopenharmony_ci IOPRIO_CLASS_RT, 1038c2ecf20Sopenharmony_ci IOPRIO_CLASS_BE, 1048c2ecf20Sopenharmony_ci IOPRIO_CLASS_IDLE, 1058c2ecf20Sopenharmony_ci }; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci enum { 1088c2ecf20Sopenharmony_ci IOPRIO_WHO_PROCESS = 1, 1098c2ecf20Sopenharmony_ci IOPRIO_WHO_PGRP, 1108c2ecf20Sopenharmony_ci IOPRIO_WHO_USER, 1118c2ecf20Sopenharmony_ci }; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci #define IOPRIO_CLASS_SHIFT 13 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci const char *to_prio[] = { "none", "realtime", "best-effort", "idle", }; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci int main(int argc, char *argv[]) 1188c2ecf20Sopenharmony_ci { 1198c2ecf20Sopenharmony_ci int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE; 1208c2ecf20Sopenharmony_ci int c, pid = 0; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) { 1238c2ecf20Sopenharmony_ci switch (c) { 1248c2ecf20Sopenharmony_ci case 'n': 1258c2ecf20Sopenharmony_ci ioprio = strtol(optarg, NULL, 10); 1268c2ecf20Sopenharmony_ci set = 1; 1278c2ecf20Sopenharmony_ci break; 1288c2ecf20Sopenharmony_ci case 'c': 1298c2ecf20Sopenharmony_ci ioprio_class = strtol(optarg, NULL, 10); 1308c2ecf20Sopenharmony_ci set = 1; 1318c2ecf20Sopenharmony_ci break; 1328c2ecf20Sopenharmony_ci case 'p': 1338c2ecf20Sopenharmony_ci pid = strtol(optarg, NULL, 10); 1348c2ecf20Sopenharmony_ci break; 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci } 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci switch (ioprio_class) { 1398c2ecf20Sopenharmony_ci case IOPRIO_CLASS_NONE: 1408c2ecf20Sopenharmony_ci ioprio_class = IOPRIO_CLASS_BE; 1418c2ecf20Sopenharmony_ci break; 1428c2ecf20Sopenharmony_ci case IOPRIO_CLASS_RT: 1438c2ecf20Sopenharmony_ci case IOPRIO_CLASS_BE: 1448c2ecf20Sopenharmony_ci break; 1458c2ecf20Sopenharmony_ci case IOPRIO_CLASS_IDLE: 1468c2ecf20Sopenharmony_ci ioprio = 7; 1478c2ecf20Sopenharmony_ci break; 1488c2ecf20Sopenharmony_ci default: 1498c2ecf20Sopenharmony_ci printf("bad prio class %d\n", ioprio_class); 1508c2ecf20Sopenharmony_ci return 1; 1518c2ecf20Sopenharmony_ci } 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci if (!set) { 1548c2ecf20Sopenharmony_ci if (!pid && argv[optind]) 1558c2ecf20Sopenharmony_ci pid = strtol(argv[optind], NULL, 10); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci printf("pid=%d, %d\n", pid, ioprio); 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci if (ioprio == -1) 1628c2ecf20Sopenharmony_ci perror("ioprio_get"); 1638c2ecf20Sopenharmony_ci else { 1648c2ecf20Sopenharmony_ci ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT; 1658c2ecf20Sopenharmony_ci ioprio = ioprio & 0xff; 1668c2ecf20Sopenharmony_ci printf("%s: prio %d\n", to_prio[ioprio_class], ioprio); 1678c2ecf20Sopenharmony_ci } 1688c2ecf20Sopenharmony_ci } else { 1698c2ecf20Sopenharmony_ci if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) { 1708c2ecf20Sopenharmony_ci perror("ioprio_set"); 1718c2ecf20Sopenharmony_ci return 1; 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci if (argv[optind]) 1758c2ecf20Sopenharmony_ci execvp(argv[optind], &argv[optind]); 1768c2ecf20Sopenharmony_ci } 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci return 0; 1798c2ecf20Sopenharmony_ci } 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ciMarch 11 2005, Jens Axboe <jens.axboe@oracle.com> 183