1f08c3bdfSopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 2f08c3bdfSopenharmony_ci/* 3f08c3bdfSopenharmony_ci * Copyright (C) 2012 Linux Test Project, Inc. 4f08c3bdfSopenharmony_ci */ 5f08c3bdfSopenharmony_ci 6f08c3bdfSopenharmony_ci/* 7f08c3bdfSopenharmony_ci * use migrate_pages() and check that address is on correct node 8f08c3bdfSopenharmony_ci * 1. process A can migrate its non-shared mem with CAP_SYS_NICE 9f08c3bdfSopenharmony_ci * 2. process A can migrate its non-shared mem without CAP_SYS_NICE 10f08c3bdfSopenharmony_ci * 3. process A can migrate shared mem only with CAP_SYS_NICE 11f08c3bdfSopenharmony_ci * 4. process A can migrate non-shared mem in process B with same effective uid 12f08c3bdfSopenharmony_ci * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE 13f08c3bdfSopenharmony_ci */ 14f08c3bdfSopenharmony_ci#include <sys/types.h> 15f08c3bdfSopenharmony_ci#include <sys/syscall.h> 16f08c3bdfSopenharmony_ci#include <sys/wait.h> 17f08c3bdfSopenharmony_ci#include <sys/mman.h> 18f08c3bdfSopenharmony_ci#include <sys/prctl.h> 19f08c3bdfSopenharmony_ci#include <errno.h> 20f08c3bdfSopenharmony_ci#include <stdio.h> 21f08c3bdfSopenharmony_ci#include <stdlib.h> 22f08c3bdfSopenharmony_ci#include <unistd.h> 23f08c3bdfSopenharmony_ci#include <pwd.h> 24f08c3bdfSopenharmony_ci 25f08c3bdfSopenharmony_ci#include "tst_test.h" 26f08c3bdfSopenharmony_ci#include "lapi/syscalls.h" 27f08c3bdfSopenharmony_ci#include "numa_helper.h" 28f08c3bdfSopenharmony_ci#include "migrate_pages_common.h" 29f08c3bdfSopenharmony_ci 30f08c3bdfSopenharmony_ci/* 31f08c3bdfSopenharmony_ci * This is an estimated minimum of free mem required to migrate this 32f08c3bdfSopenharmony_ci * process to another node as migrate_pages will fail if there is not 33f08c3bdfSopenharmony_ci * enough free space on node. While running this test on x86_64 34f08c3bdfSopenharmony_ci * it used ~2048 pages (total VM, not just RSS). Considering ia64 as 35f08c3bdfSopenharmony_ci * architecture with largest (non-huge) page size (16k), this limit 36f08c3bdfSopenharmony_ci * is set to 2048*16k == 32M. 37f08c3bdfSopenharmony_ci */ 38f08c3bdfSopenharmony_ci#define NODE_MIN_FREEMEM (32*1024*1024) 39f08c3bdfSopenharmony_ci 40f08c3bdfSopenharmony_ci#ifdef HAVE_NUMA_V2 41f08c3bdfSopenharmony_ci 42f08c3bdfSopenharmony_cistatic const char nobody_uid[] = "nobody"; 43f08c3bdfSopenharmony_cistatic struct passwd *ltpuser; 44f08c3bdfSopenharmony_cistatic int *nodes, nodeA, nodeB; 45f08c3bdfSopenharmony_cistatic int num_nodes; 46f08c3bdfSopenharmony_ci 47f08c3bdfSopenharmony_cistatic void print_mem_stats(pid_t pid, int node) 48f08c3bdfSopenharmony_ci{ 49f08c3bdfSopenharmony_ci char s[64]; 50f08c3bdfSopenharmony_ci long long node_size, freep; 51f08c3bdfSopenharmony_ci 52f08c3bdfSopenharmony_ci if (pid == 0) 53f08c3bdfSopenharmony_ci pid = getpid(); 54f08c3bdfSopenharmony_ci 55f08c3bdfSopenharmony_ci tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node); 56f08c3bdfSopenharmony_ci 57f08c3bdfSopenharmony_ci /* dump pid's VM info */ 58f08c3bdfSopenharmony_ci sprintf(s, "cat /proc/%d/status", pid); 59f08c3bdfSopenharmony_ci system(s); 60f08c3bdfSopenharmony_ci sprintf(s, "cat /proc/%d/numa_maps", pid); 61f08c3bdfSopenharmony_ci system(s); 62f08c3bdfSopenharmony_ci 63f08c3bdfSopenharmony_ci /* dump node free mem */ 64f08c3bdfSopenharmony_ci node_size = numa_node_size64(node, &freep); 65f08c3bdfSopenharmony_ci tst_res(TINFO, "Node id: %d, size: %lld, free: %lld", 66f08c3bdfSopenharmony_ci node, node_size, freep); 67f08c3bdfSopenharmony_ci} 68f08c3bdfSopenharmony_ci 69f08c3bdfSopenharmony_cistatic int migrate_to_node(pid_t pid, int node) 70f08c3bdfSopenharmony_ci{ 71f08c3bdfSopenharmony_ci unsigned long nodemask_size, max_node; 72f08c3bdfSopenharmony_ci unsigned long *old_nodes, *new_nodes; 73f08c3bdfSopenharmony_ci int i; 74f08c3bdfSopenharmony_ci 75f08c3bdfSopenharmony_ci tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d", 76f08c3bdfSopenharmony_ci getpid(), pid, node); 77f08c3bdfSopenharmony_ci max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); 78f08c3bdfSopenharmony_ci nodemask_size = max_node / 8; 79f08c3bdfSopenharmony_ci old_nodes = SAFE_MALLOC(nodemask_size); 80f08c3bdfSopenharmony_ci new_nodes = SAFE_MALLOC(nodemask_size); 81f08c3bdfSopenharmony_ci 82f08c3bdfSopenharmony_ci memset(old_nodes, 0, nodemask_size); 83f08c3bdfSopenharmony_ci memset(new_nodes, 0, nodemask_size); 84f08c3bdfSopenharmony_ci for (i = 0; i < num_nodes; i++) 85f08c3bdfSopenharmony_ci set_bit(old_nodes, nodes[i], 1); 86f08c3bdfSopenharmony_ci set_bit(new_nodes, node, 1); 87f08c3bdfSopenharmony_ci 88f08c3bdfSopenharmony_ci TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes, 89f08c3bdfSopenharmony_ci new_nodes)); 90f08c3bdfSopenharmony_ci if (TST_RET != 0) { 91f08c3bdfSopenharmony_ci if (TST_RET < 0) { 92f08c3bdfSopenharmony_ci tst_res(TFAIL | TTERRNO, "migrate_pages failed " 93f08c3bdfSopenharmony_ci "ret: %ld, ", TST_RET); 94f08c3bdfSopenharmony_ci print_mem_stats(pid, node); 95f08c3bdfSopenharmony_ci } else { 96f08c3bdfSopenharmony_ci tst_res(TINFO, "migrate_pages could not migrate all " 97f08c3bdfSopenharmony_ci "pages, not migrated: %ld", TST_RET); 98f08c3bdfSopenharmony_ci } 99f08c3bdfSopenharmony_ci } 100f08c3bdfSopenharmony_ci free(old_nodes); 101f08c3bdfSopenharmony_ci free(new_nodes); 102f08c3bdfSopenharmony_ci return TST_RET; 103f08c3bdfSopenharmony_ci} 104f08c3bdfSopenharmony_ci 105f08c3bdfSopenharmony_cistatic int addr_on_node(void *addr) 106f08c3bdfSopenharmony_ci{ 107f08c3bdfSopenharmony_ci int node; 108f08c3bdfSopenharmony_ci int ret; 109f08c3bdfSopenharmony_ci 110f08c3bdfSopenharmony_ci ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0, 111f08c3bdfSopenharmony_ci (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR); 112f08c3bdfSopenharmony_ci if (ret == -1) { 113f08c3bdfSopenharmony_ci tst_res(TFAIL | TERRNO, 114f08c3bdfSopenharmony_ci "error getting memory policy for page %p", addr); 115f08c3bdfSopenharmony_ci } 116f08c3bdfSopenharmony_ci return node; 117f08c3bdfSopenharmony_ci} 118f08c3bdfSopenharmony_ci 119f08c3bdfSopenharmony_cistatic int check_addr_on_node(void *addr, int exp_node) 120f08c3bdfSopenharmony_ci{ 121f08c3bdfSopenharmony_ci int node; 122f08c3bdfSopenharmony_ci 123f08c3bdfSopenharmony_ci node = addr_on_node(addr); 124f08c3bdfSopenharmony_ci if (node == exp_node) { 125f08c3bdfSopenharmony_ci tst_res(TPASS, "pid(%d) addr %p is on expected node: %d", 126f08c3bdfSopenharmony_ci getpid(), addr, exp_node); 127f08c3bdfSopenharmony_ci return TPASS; 128f08c3bdfSopenharmony_ci } else { 129f08c3bdfSopenharmony_ci tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d " 130f08c3bdfSopenharmony_ci ", expected %d", getpid(), addr, node, exp_node); 131f08c3bdfSopenharmony_ci print_mem_stats(0, exp_node); 132f08c3bdfSopenharmony_ci return TFAIL; 133f08c3bdfSopenharmony_ci } 134f08c3bdfSopenharmony_ci} 135f08c3bdfSopenharmony_ci 136f08c3bdfSopenharmony_cistatic void test_migrate_current_process(int node1, int node2, int cap_sys_nice) 137f08c3bdfSopenharmony_ci{ 138f08c3bdfSopenharmony_ci char *private, *shared; 139f08c3bdfSopenharmony_ci int ret; 140f08c3bdfSopenharmony_ci pid_t child; 141f08c3bdfSopenharmony_ci 142f08c3bdfSopenharmony_ci /* parent can migrate its non-shared memory */ 143f08c3bdfSopenharmony_ci tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice); 144f08c3bdfSopenharmony_ci private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 145f08c3bdfSopenharmony_ci MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 146f08c3bdfSopenharmony_ci private[0] = 0; 147f08c3bdfSopenharmony_ci tst_res(TINFO, "private anonymous: %p", private); 148f08c3bdfSopenharmony_ci 149f08c3bdfSopenharmony_ci migrate_to_node(0, node2); 150f08c3bdfSopenharmony_ci check_addr_on_node(private, node2); 151f08c3bdfSopenharmony_ci migrate_to_node(0, node1); 152f08c3bdfSopenharmony_ci check_addr_on_node(private, node1); 153f08c3bdfSopenharmony_ci SAFE_MUNMAP(private, getpagesize()); 154f08c3bdfSopenharmony_ci 155f08c3bdfSopenharmony_ci /* parent can migrate shared memory with CAP_SYS_NICE */ 156f08c3bdfSopenharmony_ci shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 157f08c3bdfSopenharmony_ci MAP_ANONYMOUS | MAP_SHARED, 0, 0); 158f08c3bdfSopenharmony_ci shared[0] = 1; 159f08c3bdfSopenharmony_ci tst_res(TINFO, "shared anonymous: %p", shared); 160f08c3bdfSopenharmony_ci migrate_to_node(0, node2); 161f08c3bdfSopenharmony_ci check_addr_on_node(shared, node2); 162f08c3bdfSopenharmony_ci 163f08c3bdfSopenharmony_ci /* shared mem is on node2, try to migrate in child to node1 */ 164f08c3bdfSopenharmony_ci fflush(stdout); 165f08c3bdfSopenharmony_ci child = SAFE_FORK(); 166f08c3bdfSopenharmony_ci if (child == 0) { 167f08c3bdfSopenharmony_ci tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d", 168f08c3bdfSopenharmony_ci cap_sys_nice); 169f08c3bdfSopenharmony_ci private = SAFE_MMAP(NULL, getpagesize(), 170f08c3bdfSopenharmony_ci PROT_READ | PROT_WRITE, 171f08c3bdfSopenharmony_ci MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 172f08c3bdfSopenharmony_ci private[0] = 1; 173f08c3bdfSopenharmony_ci shared[0] = 1; 174f08c3bdfSopenharmony_ci if (!cap_sys_nice) 175f08c3bdfSopenharmony_ci SAFE_SETEUID(ltpuser->pw_uid); 176f08c3bdfSopenharmony_ci 177f08c3bdfSopenharmony_ci migrate_to_node(0, node1); 178f08c3bdfSopenharmony_ci /* child can migrate non-shared memory */ 179f08c3bdfSopenharmony_ci ret = check_addr_on_node(private, node1); 180f08c3bdfSopenharmony_ci 181f08c3bdfSopenharmony_ci exit(ret); 182f08c3bdfSopenharmony_ci } 183f08c3bdfSopenharmony_ci 184f08c3bdfSopenharmony_ci SAFE_WAITPID(child, NULL, 0); 185f08c3bdfSopenharmony_ci if (cap_sys_nice) 186f08c3bdfSopenharmony_ci /* child can migrate shared memory only 187f08c3bdfSopenharmony_ci * with CAP_SYS_NICE */ 188f08c3bdfSopenharmony_ci check_addr_on_node(shared, node1); 189f08c3bdfSopenharmony_ci else 190f08c3bdfSopenharmony_ci check_addr_on_node(shared, node2); 191f08c3bdfSopenharmony_ci SAFE_MUNMAP(shared, getpagesize()); 192f08c3bdfSopenharmony_ci} 193f08c3bdfSopenharmony_ci 194f08c3bdfSopenharmony_cistatic void test_migrate_other_process(int node1, int node2, int cap_sys_nice) 195f08c3bdfSopenharmony_ci{ 196f08c3bdfSopenharmony_ci char *private; 197f08c3bdfSopenharmony_ci int ret; 198f08c3bdfSopenharmony_ci pid_t child1, child2; 199f08c3bdfSopenharmony_ci 200f08c3bdfSopenharmony_ci tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice); 201f08c3bdfSopenharmony_ci 202f08c3bdfSopenharmony_ci fflush(stdout); 203f08c3bdfSopenharmony_ci child1 = SAFE_FORK(); 204f08c3bdfSopenharmony_ci if (child1 == 0) { 205f08c3bdfSopenharmony_ci private = SAFE_MMAP(NULL, getpagesize(), 206f08c3bdfSopenharmony_ci PROT_READ | PROT_WRITE, 207f08c3bdfSopenharmony_ci MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 208f08c3bdfSopenharmony_ci private[0] = 0; 209f08c3bdfSopenharmony_ci 210f08c3bdfSopenharmony_ci /* make sure we are on node1 */ 211f08c3bdfSopenharmony_ci migrate_to_node(0, node1); 212f08c3bdfSopenharmony_ci check_addr_on_node(private, node1); 213f08c3bdfSopenharmony_ci 214f08c3bdfSopenharmony_ci SAFE_SETUID(ltpuser->pw_uid); 215f08c3bdfSopenharmony_ci 216f08c3bdfSopenharmony_ci /* commit_creds() will clear dumpable, restore it */ 217f08c3bdfSopenharmony_ci if (prctl(PR_SET_DUMPABLE, 1)) 218f08c3bdfSopenharmony_ci tst_brk(TBROK | TERRNO, "prctl"); 219f08c3bdfSopenharmony_ci 220f08c3bdfSopenharmony_ci /* signal child2 it's OK to migrate child1 and wait */ 221f08c3bdfSopenharmony_ci TST_CHECKPOINT_WAKE(0); 222f08c3bdfSopenharmony_ci TST_CHECKPOINT_WAIT(1); 223f08c3bdfSopenharmony_ci 224f08c3bdfSopenharmony_ci /* child2 can migrate child1 process if it's privileged */ 225f08c3bdfSopenharmony_ci /* child2 can migrate child1 process if it has same uid */ 226f08c3bdfSopenharmony_ci ret = check_addr_on_node(private, node2); 227f08c3bdfSopenharmony_ci 228f08c3bdfSopenharmony_ci exit(ret); 229f08c3bdfSopenharmony_ci } 230f08c3bdfSopenharmony_ci 231f08c3bdfSopenharmony_ci fflush(stdout); 232f08c3bdfSopenharmony_ci child2 = SAFE_FORK(); 233f08c3bdfSopenharmony_ci if (child2 == 0) { 234f08c3bdfSopenharmony_ci if (!cap_sys_nice) 235f08c3bdfSopenharmony_ci SAFE_SETUID(ltpuser->pw_uid); 236f08c3bdfSopenharmony_ci 237f08c3bdfSopenharmony_ci /* wait until child1 is ready on node1, then migrate and 238f08c3bdfSopenharmony_ci * signal to check current node */ 239f08c3bdfSopenharmony_ci TST_CHECKPOINT_WAIT(0); 240f08c3bdfSopenharmony_ci migrate_to_node(child1, node2); 241f08c3bdfSopenharmony_ci TST_CHECKPOINT_WAKE(1); 242f08c3bdfSopenharmony_ci 243f08c3bdfSopenharmony_ci exit(TPASS); 244f08c3bdfSopenharmony_ci } 245f08c3bdfSopenharmony_ci 246f08c3bdfSopenharmony_ci SAFE_WAITPID(child1, NULL, 0); 247f08c3bdfSopenharmony_ci SAFE_WAITPID(child2, NULL, 0); 248f08c3bdfSopenharmony_ci} 249f08c3bdfSopenharmony_ci 250f08c3bdfSopenharmony_cistatic void run(void) 251f08c3bdfSopenharmony_ci{ 252f08c3bdfSopenharmony_ci test_migrate_current_process(nodeA, nodeB, 1); 253f08c3bdfSopenharmony_ci test_migrate_current_process(nodeA, nodeB, 0); 254f08c3bdfSopenharmony_ci test_migrate_other_process(nodeA, nodeB, 1); 255f08c3bdfSopenharmony_ci test_migrate_other_process(nodeA, nodeB, 0); 256f08c3bdfSopenharmony_ci} 257f08c3bdfSopenharmony_ci 258f08c3bdfSopenharmony_cistatic void setup(void) 259f08c3bdfSopenharmony_ci{ 260f08c3bdfSopenharmony_ci int ret, i, j; 261f08c3bdfSopenharmony_ci int pagesize = getpagesize(); 262f08c3bdfSopenharmony_ci void *p; 263f08c3bdfSopenharmony_ci 264f08c3bdfSopenharmony_ci tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL); 265f08c3bdfSopenharmony_ci 266f08c3bdfSopenharmony_ci if (numa_available() == -1) 267f08c3bdfSopenharmony_ci tst_brk(TCONF, "NUMA not available"); 268f08c3bdfSopenharmony_ci 269f08c3bdfSopenharmony_ci ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes); 270f08c3bdfSopenharmony_ci if (ret < 0) 271f08c3bdfSopenharmony_ci tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret); 272f08c3bdfSopenharmony_ci 273f08c3bdfSopenharmony_ci if (num_nodes < 2) 274f08c3bdfSopenharmony_ci tst_brk(TCONF, "at least 2 allowed NUMA nodes" 275f08c3bdfSopenharmony_ci " are required"); 276f08c3bdfSopenharmony_ci /* 277f08c3bdfSopenharmony_ci * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes 278f08c3bdfSopenharmony_ci * The reason is that: 279f08c3bdfSopenharmony_ci * 1. migrate_pages() is expected to succeed 280f08c3bdfSopenharmony_ci * 2. this test avoids hitting: 281f08c3bdfSopenharmony_ci * Bug 870326 - migrate_pages() reports success, but pages are 282f08c3bdfSopenharmony_ci * not moved to desired node 283f08c3bdfSopenharmony_ci * https://bugzilla.redhat.com/show_bug.cgi?id=870326 284f08c3bdfSopenharmony_ci */ 285f08c3bdfSopenharmony_ci nodeA = nodeB = -1; 286f08c3bdfSopenharmony_ci for (i = 0; i < num_nodes; i++) { 287f08c3bdfSopenharmony_ci p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]); 288f08c3bdfSopenharmony_ci if (p == NULL) 289f08c3bdfSopenharmony_ci break; 290f08c3bdfSopenharmony_ci memset(p, 0xff, NODE_MIN_FREEMEM); 291f08c3bdfSopenharmony_ci 292f08c3bdfSopenharmony_ci j = 0; 293f08c3bdfSopenharmony_ci while (j < NODE_MIN_FREEMEM) { 294f08c3bdfSopenharmony_ci if (addr_on_node(p + j) != nodes[i]) 295f08c3bdfSopenharmony_ci break; 296f08c3bdfSopenharmony_ci j += pagesize; 297f08c3bdfSopenharmony_ci } 298f08c3bdfSopenharmony_ci numa_free(p, NODE_MIN_FREEMEM); 299f08c3bdfSopenharmony_ci 300f08c3bdfSopenharmony_ci if (j >= NODE_MIN_FREEMEM) { 301f08c3bdfSopenharmony_ci if (nodeA == -1) 302f08c3bdfSopenharmony_ci nodeA = nodes[i]; 303f08c3bdfSopenharmony_ci else if (nodeB == -1) 304f08c3bdfSopenharmony_ci nodeB = nodes[i]; 305f08c3bdfSopenharmony_ci else 306f08c3bdfSopenharmony_ci break; 307f08c3bdfSopenharmony_ci } 308f08c3bdfSopenharmony_ci } 309f08c3bdfSopenharmony_ci 310f08c3bdfSopenharmony_ci if (nodeA == -1 || nodeB == -1) 311f08c3bdfSopenharmony_ci tst_brk(TCONF, "at least 2 NUMA nodes with " 312f08c3bdfSopenharmony_ci "free mem > %d are needed", NODE_MIN_FREEMEM); 313f08c3bdfSopenharmony_ci tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB); 314f08c3bdfSopenharmony_ci 315f08c3bdfSopenharmony_ci ltpuser = getpwnam(nobody_uid); 316f08c3bdfSopenharmony_ci if (ltpuser == NULL) 317f08c3bdfSopenharmony_ci tst_brk(TBROK | TERRNO, "getpwnam failed"); 318f08c3bdfSopenharmony_ci} 319f08c3bdfSopenharmony_ci 320f08c3bdfSopenharmony_cistatic struct tst_test test = { 321f08c3bdfSopenharmony_ci .needs_root = 1, 322f08c3bdfSopenharmony_ci .needs_checkpoints = 1, 323f08c3bdfSopenharmony_ci .forks_child = 1, 324f08c3bdfSopenharmony_ci .test_all = run, 325f08c3bdfSopenharmony_ci .setup = setup, 326f08c3bdfSopenharmony_ci .save_restore = (const struct tst_path_val[]) { 327f08c3bdfSopenharmony_ci {"/proc/sys/kernel/numa_balancing", "0", 328f08c3bdfSopenharmony_ci TST_SR_SKIP_MISSING | TST_SR_TCONF_RO}, 329f08c3bdfSopenharmony_ci {} 330f08c3bdfSopenharmony_ci }, 331f08c3bdfSopenharmony_ci}; 332f08c3bdfSopenharmony_ci#else 333f08c3bdfSopenharmony_ciTST_TEST_TCONF(NUMA_ERROR_MSG); 334f08c3bdfSopenharmony_ci#endif 335