1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) 2012 Linux Test Project, Inc. 4 */ 5 6/* 7 * use migrate_pages() and check that address is on correct node 8 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE 9 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE 10 * 3. process A can migrate shared mem only with CAP_SYS_NICE 11 * 4. process A can migrate non-shared mem in process B with same effective uid 12 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE 13 */ 14#include <sys/types.h> 15#include <sys/syscall.h> 16#include <sys/wait.h> 17#include <sys/mman.h> 18#include <sys/prctl.h> 19#include <errno.h> 20#include <stdio.h> 21#include <stdlib.h> 22#include <unistd.h> 23#include <pwd.h> 24 25#include "tst_test.h" 26#include "lapi/syscalls.h" 27#include "numa_helper.h" 28#include "migrate_pages_common.h" 29 30/* 31 * This is an estimated minimum of free mem required to migrate this 32 * process to another node as migrate_pages will fail if there is not 33 * enough free space on node. While running this test on x86_64 34 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as 35 * architecture with largest (non-huge) page size (16k), this limit 36 * is set to 2048*16k == 32M. 37 */ 38#define NODE_MIN_FREEMEM (32*1024*1024) 39 40#ifdef HAVE_NUMA_V2 41 42static const char nobody_uid[] = "nobody"; 43static struct passwd *ltpuser; 44static int *nodes, nodeA, nodeB; 45static int num_nodes; 46 47static void print_mem_stats(pid_t pid, int node) 48{ 49 char s[64]; 50 long long node_size, freep; 51 52 if (pid == 0) 53 pid = getpid(); 54 55 tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node); 56 57 /* dump pid's VM info */ 58 sprintf(s, "cat /proc/%d/status", pid); 59 system(s); 60 sprintf(s, "cat /proc/%d/numa_maps", pid); 61 system(s); 62 63 /* dump node free mem */ 64 node_size = numa_node_size64(node, &freep); 65 tst_res(TINFO, "Node id: %d, size: %lld, free: %lld", 66 node, node_size, freep); 67} 68 69static int migrate_to_node(pid_t pid, int node) 70{ 71 unsigned long nodemask_size, max_node; 72 unsigned long *old_nodes, *new_nodes; 73 int i; 74 75 tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d", 76 getpid(), pid, node); 77 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); 78 nodemask_size = max_node / 8; 79 old_nodes = SAFE_MALLOC(nodemask_size); 80 new_nodes = SAFE_MALLOC(nodemask_size); 81 82 memset(old_nodes, 0, nodemask_size); 83 memset(new_nodes, 0, nodemask_size); 84 for (i = 0; i < num_nodes; i++) 85 set_bit(old_nodes, nodes[i], 1); 86 set_bit(new_nodes, node, 1); 87 88 TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes, 89 new_nodes)); 90 if (TST_RET != 0) { 91 if (TST_RET < 0) { 92 tst_res(TFAIL | TTERRNO, "migrate_pages failed " 93 "ret: %ld, ", TST_RET); 94 print_mem_stats(pid, node); 95 } else { 96 tst_res(TINFO, "migrate_pages could not migrate all " 97 "pages, not migrated: %ld", TST_RET); 98 } 99 } 100 free(old_nodes); 101 free(new_nodes); 102 return TST_RET; 103} 104 105static int addr_on_node(void *addr) 106{ 107 int node; 108 int ret; 109 110 ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0, 111 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR); 112 if (ret == -1) { 113 tst_res(TFAIL | TERRNO, 114 "error getting memory policy for page %p", addr); 115 } 116 return node; 117} 118 119static int check_addr_on_node(void *addr, int exp_node) 120{ 121 int node; 122 123 node = addr_on_node(addr); 124 if (node == exp_node) { 125 tst_res(TPASS, "pid(%d) addr %p is on expected node: %d", 126 getpid(), addr, exp_node); 127 return TPASS; 128 } else { 129 tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d " 130 ", expected %d", getpid(), addr, node, exp_node); 131 print_mem_stats(0, exp_node); 132 return TFAIL; 133 } 134} 135 136static void test_migrate_current_process(int node1, int node2, int cap_sys_nice) 137{ 138 char *private, *shared; 139 int ret; 140 pid_t child; 141 142 /* parent can migrate its non-shared memory */ 143 tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice); 144 private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 145 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 146 private[0] = 0; 147 tst_res(TINFO, "private anonymous: %p", private); 148 149 migrate_to_node(0, node2); 150 check_addr_on_node(private, node2); 151 migrate_to_node(0, node1); 152 check_addr_on_node(private, node1); 153 SAFE_MUNMAP(private, getpagesize()); 154 155 /* parent can migrate shared memory with CAP_SYS_NICE */ 156 shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 157 MAP_ANONYMOUS | MAP_SHARED, 0, 0); 158 shared[0] = 1; 159 tst_res(TINFO, "shared anonymous: %p", shared); 160 migrate_to_node(0, node2); 161 check_addr_on_node(shared, node2); 162 163 /* shared mem is on node2, try to migrate in child to node1 */ 164 fflush(stdout); 165 child = SAFE_FORK(); 166 if (child == 0) { 167 tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d", 168 cap_sys_nice); 169 private = SAFE_MMAP(NULL, getpagesize(), 170 PROT_READ | PROT_WRITE, 171 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 172 private[0] = 1; 173 shared[0] = 1; 174 if (!cap_sys_nice) 175 SAFE_SETEUID(ltpuser->pw_uid); 176 177 migrate_to_node(0, node1); 178 /* child can migrate non-shared memory */ 179 ret = check_addr_on_node(private, node1); 180 181 exit(ret); 182 } 183 184 SAFE_WAITPID(child, NULL, 0); 185 if (cap_sys_nice) 186 /* child can migrate shared memory only 187 * with CAP_SYS_NICE */ 188 check_addr_on_node(shared, node1); 189 else 190 check_addr_on_node(shared, node2); 191 SAFE_MUNMAP(shared, getpagesize()); 192} 193 194static void test_migrate_other_process(int node1, int node2, int cap_sys_nice) 195{ 196 char *private; 197 int ret; 198 pid_t child1, child2; 199 200 tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice); 201 202 fflush(stdout); 203 child1 = SAFE_FORK(); 204 if (child1 == 0) { 205 private = SAFE_MMAP(NULL, getpagesize(), 206 PROT_READ | PROT_WRITE, 207 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 208 private[0] = 0; 209 210 /* make sure we are on node1 */ 211 migrate_to_node(0, node1); 212 check_addr_on_node(private, node1); 213 214 SAFE_SETUID(ltpuser->pw_uid); 215 216 /* commit_creds() will clear dumpable, restore it */ 217 if (prctl(PR_SET_DUMPABLE, 1)) 218 tst_brk(TBROK | TERRNO, "prctl"); 219 220 /* signal child2 it's OK to migrate child1 and wait */ 221 TST_CHECKPOINT_WAKE(0); 222 TST_CHECKPOINT_WAIT(1); 223 224 /* child2 can migrate child1 process if it's privileged */ 225 /* child2 can migrate child1 process if it has same uid */ 226 ret = check_addr_on_node(private, node2); 227 228 exit(ret); 229 } 230 231 fflush(stdout); 232 child2 = SAFE_FORK(); 233 if (child2 == 0) { 234 if (!cap_sys_nice) 235 SAFE_SETUID(ltpuser->pw_uid); 236 237 /* wait until child1 is ready on node1, then migrate and 238 * signal to check current node */ 239 TST_CHECKPOINT_WAIT(0); 240 migrate_to_node(child1, node2); 241 TST_CHECKPOINT_WAKE(1); 242 243 exit(TPASS); 244 } 245 246 SAFE_WAITPID(child1, NULL, 0); 247 SAFE_WAITPID(child2, NULL, 0); 248} 249 250static void run(void) 251{ 252 test_migrate_current_process(nodeA, nodeB, 1); 253 test_migrate_current_process(nodeA, nodeB, 0); 254 test_migrate_other_process(nodeA, nodeB, 1); 255 test_migrate_other_process(nodeA, nodeB, 0); 256} 257 258static void setup(void) 259{ 260 int ret, i, j; 261 int pagesize = getpagesize(); 262 void *p; 263 264 tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL); 265 266 if (numa_available() == -1) 267 tst_brk(TCONF, "NUMA not available"); 268 269 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes); 270 if (ret < 0) 271 tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret); 272 273 if (num_nodes < 2) 274 tst_brk(TCONF, "at least 2 allowed NUMA nodes" 275 " are required"); 276 /* 277 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes 278 * The reason is that: 279 * 1. migrate_pages() is expected to succeed 280 * 2. this test avoids hitting: 281 * Bug 870326 - migrate_pages() reports success, but pages are 282 * not moved to desired node 283 * https://bugzilla.redhat.com/show_bug.cgi?id=870326 284 */ 285 nodeA = nodeB = -1; 286 for (i = 0; i < num_nodes; i++) { 287 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]); 288 if (p == NULL) 289 break; 290 memset(p, 0xff, NODE_MIN_FREEMEM); 291 292 j = 0; 293 while (j < NODE_MIN_FREEMEM) { 294 if (addr_on_node(p + j) != nodes[i]) 295 break; 296 j += pagesize; 297 } 298 numa_free(p, NODE_MIN_FREEMEM); 299 300 if (j >= NODE_MIN_FREEMEM) { 301 if (nodeA == -1) 302 nodeA = nodes[i]; 303 else if (nodeB == -1) 304 nodeB = nodes[i]; 305 else 306 break; 307 } 308 } 309 310 if (nodeA == -1 || nodeB == -1) 311 tst_brk(TCONF, "at least 2 NUMA nodes with " 312 "free mem > %d are needed", NODE_MIN_FREEMEM); 313 tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB); 314 315 ltpuser = getpwnam(nobody_uid); 316 if (ltpuser == NULL) 317 tst_brk(TBROK | TERRNO, "getpwnam failed"); 318} 319 320static struct tst_test test = { 321 .needs_root = 1, 322 .needs_checkpoints = 1, 323 .forks_child = 1, 324 .test_all = run, 325 .setup = setup, 326 .save_restore = (const struct tst_path_val[]) { 327 {"/proc/sys/kernel/numa_balancing", "0", 328 TST_SR_SKIP_MISSING | TST_SR_TCONF_RO}, 329 {} 330 }, 331}; 332#else 333TST_TEST_TCONF(NUMA_ERROR_MSG); 334#endif 335