18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci// Copyright(c) 2018 Intel Corporation. All rights reserved. 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/mm.h> 58c2ecf20Sopenharmony_ci#include <linux/init.h> 68c2ecf20Sopenharmony_ci#include <linux/mmzone.h> 78c2ecf20Sopenharmony_ci#include <linux/random.h> 88c2ecf20Sopenharmony_ci#include <linux/moduleparam.h> 98c2ecf20Sopenharmony_ci#include "internal.h" 108c2ecf20Sopenharmony_ci#include "shuffle.h" 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key); 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_cistatic bool shuffle_param; 158c2ecf20Sopenharmony_cistatic int shuffle_show(char *buffer, const struct kernel_param *kp) 168c2ecf20Sopenharmony_ci{ 178c2ecf20Sopenharmony_ci return sprintf(buffer, "%c\n", shuffle_param ? 'Y' : 'N'); 188c2ecf20Sopenharmony_ci} 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_cistatic __meminit int shuffle_store(const char *val, 218c2ecf20Sopenharmony_ci const struct kernel_param *kp) 228c2ecf20Sopenharmony_ci{ 238c2ecf20Sopenharmony_ci int rc = param_set_bool(val, kp); 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci if (rc < 0) 268c2ecf20Sopenharmony_ci return rc; 278c2ecf20Sopenharmony_ci if (shuffle_param) 288c2ecf20Sopenharmony_ci static_branch_enable(&page_alloc_shuffle_key); 298c2ecf20Sopenharmony_ci return 0; 308c2ecf20Sopenharmony_ci} 318c2ecf20Sopenharmony_cimodule_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400); 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci/* 348c2ecf20Sopenharmony_ci * For two pages to be swapped in the shuffle, they must be free (on a 358c2ecf20Sopenharmony_ci * 'free_area' lru), have the same order, and have the same migratetype. 368c2ecf20Sopenharmony_ci */ 378c2ecf20Sopenharmony_cistatic struct page * __meminit shuffle_valid_page(struct zone *zone, 388c2ecf20Sopenharmony_ci unsigned long pfn, int order) 398c2ecf20Sopenharmony_ci{ 408c2ecf20Sopenharmony_ci struct page *page = pfn_to_online_page(pfn); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci /* 438c2ecf20Sopenharmony_ci * Given we're dealing with randomly selected pfns in a zone we 448c2ecf20Sopenharmony_ci * need to ask questions like... 458c2ecf20Sopenharmony_ci */ 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci /* ... is the page managed by the buddy? */ 488c2ecf20Sopenharmony_ci if (!page) 498c2ecf20Sopenharmony_ci return NULL; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci /* ... is the page assigned to the same zone? */ 528c2ecf20Sopenharmony_ci if (page_zone(page) != zone) 538c2ecf20Sopenharmony_ci return NULL; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci /* ...is the page free and currently on a free_area list? */ 568c2ecf20Sopenharmony_ci if (!PageBuddy(page)) 578c2ecf20Sopenharmony_ci return NULL; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci /* 608c2ecf20Sopenharmony_ci * ...is the page on the same list as the page we will 618c2ecf20Sopenharmony_ci * shuffle it with? 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci if (buddy_order(page) != order) 648c2ecf20Sopenharmony_ci return NULL; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci return page; 678c2ecf20Sopenharmony_ci} 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci/* 708c2ecf20Sopenharmony_ci * Fisher-Yates shuffle the freelist which prescribes iterating through an 718c2ecf20Sopenharmony_ci * array, pfns in this case, and randomly swapping each entry with another in 728c2ecf20Sopenharmony_ci * the span, end_pfn - start_pfn. 738c2ecf20Sopenharmony_ci * 748c2ecf20Sopenharmony_ci * To keep the implementation simple it does not attempt to correct for sources 758c2ecf20Sopenharmony_ci * of bias in the distribution, like modulo bias or pseudo-random number 768c2ecf20Sopenharmony_ci * generator bias. I.e. the expectation is that this shuffling raises the bar 778c2ecf20Sopenharmony_ci * for attacks that exploit the predictability of page allocations, but need not 788c2ecf20Sopenharmony_ci * be a perfect shuffle. 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci#define SHUFFLE_RETRY 10 818c2ecf20Sopenharmony_civoid __meminit __shuffle_zone(struct zone *z) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci unsigned long i, flags; 848c2ecf20Sopenharmony_ci unsigned long start_pfn = z->zone_start_pfn; 858c2ecf20Sopenharmony_ci unsigned long end_pfn = zone_end_pfn(z); 868c2ecf20Sopenharmony_ci const int order = SHUFFLE_ORDER; 878c2ecf20Sopenharmony_ci const int order_pages = 1 << order; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci spin_lock_irqsave(&z->lock, flags); 908c2ecf20Sopenharmony_ci start_pfn = ALIGN(start_pfn, order_pages); 918c2ecf20Sopenharmony_ci for (i = start_pfn; i < end_pfn; i += order_pages) { 928c2ecf20Sopenharmony_ci unsigned long j; 938c2ecf20Sopenharmony_ci int migratetype, retry; 948c2ecf20Sopenharmony_ci struct page *page_i, *page_j; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci /* 978c2ecf20Sopenharmony_ci * We expect page_i, in the sub-range of a zone being added 988c2ecf20Sopenharmony_ci * (@start_pfn to @end_pfn), to more likely be valid compared to 998c2ecf20Sopenharmony_ci * page_j randomly selected in the span @zone_start_pfn to 1008c2ecf20Sopenharmony_ci * @spanned_pages. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_ci page_i = shuffle_valid_page(z, i, order); 1038c2ecf20Sopenharmony_ci if (!page_i) 1048c2ecf20Sopenharmony_ci continue; 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci for (retry = 0; retry < SHUFFLE_RETRY; retry++) { 1078c2ecf20Sopenharmony_ci /* 1088c2ecf20Sopenharmony_ci * Pick a random order aligned page in the zone span as 1098c2ecf20Sopenharmony_ci * a swap target. If the selected pfn is a hole, retry 1108c2ecf20Sopenharmony_ci * up to SHUFFLE_RETRY attempts find a random valid pfn 1118c2ecf20Sopenharmony_ci * in the zone. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_ci j = z->zone_start_pfn + 1148c2ecf20Sopenharmony_ci ALIGN_DOWN(get_random_long() % z->spanned_pages, 1158c2ecf20Sopenharmony_ci order_pages); 1168c2ecf20Sopenharmony_ci page_j = shuffle_valid_page(z, j, order); 1178c2ecf20Sopenharmony_ci if (page_j && page_j != page_i) 1188c2ecf20Sopenharmony_ci break; 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci if (retry >= SHUFFLE_RETRY) { 1218c2ecf20Sopenharmony_ci pr_debug("%s: failed to swap %#lx\n", __func__, i); 1228c2ecf20Sopenharmony_ci continue; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci /* 1268c2ecf20Sopenharmony_ci * Each migratetype corresponds to its own list, make sure the 1278c2ecf20Sopenharmony_ci * types match otherwise we're moving pages to lists where they 1288c2ecf20Sopenharmony_ci * do not belong. 1298c2ecf20Sopenharmony_ci */ 1308c2ecf20Sopenharmony_ci migratetype = get_pageblock_migratetype(page_i); 1318c2ecf20Sopenharmony_ci if (get_pageblock_migratetype(page_j) != migratetype) { 1328c2ecf20Sopenharmony_ci pr_debug("%s: migratetype mismatch %#lx\n", __func__, i); 1338c2ecf20Sopenharmony_ci continue; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci list_swap(&page_i->lru, &page_j->lru); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci /* take it easy on the zone lock */ 1418c2ecf20Sopenharmony_ci if ((i % (100 * order_pages)) == 0) { 1428c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&z->lock, flags); 1438c2ecf20Sopenharmony_ci cond_resched(); 1448c2ecf20Sopenharmony_ci spin_lock_irqsave(&z->lock, flags); 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&z->lock, flags); 1488c2ecf20Sopenharmony_ci} 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci/** 1518c2ecf20Sopenharmony_ci * shuffle_free_memory - reduce the predictability of the page allocator 1528c2ecf20Sopenharmony_ci * @pgdat: node page data 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_civoid __meminit __shuffle_free_memory(pg_data_t *pgdat) 1558c2ecf20Sopenharmony_ci{ 1568c2ecf20Sopenharmony_ci struct zone *z; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 1598c2ecf20Sopenharmony_ci shuffle_zone(z); 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cibool shuffle_pick_tail(void) 1638c2ecf20Sopenharmony_ci{ 1648c2ecf20Sopenharmony_ci static u64 rand; 1658c2ecf20Sopenharmony_ci static u8 rand_bits; 1668c2ecf20Sopenharmony_ci bool ret; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci /* 1698c2ecf20Sopenharmony_ci * The lack of locking is deliberate. If 2 threads race to 1708c2ecf20Sopenharmony_ci * update the rand state it just adds to the entropy. 1718c2ecf20Sopenharmony_ci */ 1728c2ecf20Sopenharmony_ci if (rand_bits == 0) { 1738c2ecf20Sopenharmony_ci rand_bits = 64; 1748c2ecf20Sopenharmony_ci rand = get_random_u64(); 1758c2ecf20Sopenharmony_ci } 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci ret = rand & 1; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci rand_bits--; 1808c2ecf20Sopenharmony_ci rand >>= 1; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci return ret; 1838c2ecf20Sopenharmony_ci} 184