162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * KMSAN initialization routines.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2017-2021 Google LLC
662306a36Sopenharmony_ci * Author: Alexander Potapenko <glider@google.com>
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include "kmsan.h"
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <asm/sections.h>
1362306a36Sopenharmony_ci#include <linux/mm.h>
1462306a36Sopenharmony_ci#include <linux/memblock.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include "../internal.h"
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define NUM_FUTURE_RANGES 128
1962306a36Sopenharmony_cistruct start_end_pair {
2062306a36Sopenharmony_ci	u64 start, end;
2162306a36Sopenharmony_ci};
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cistatic struct start_end_pair start_end_pairs[NUM_FUTURE_RANGES] __initdata;
2462306a36Sopenharmony_cistatic int future_index __initdata;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/*
2762306a36Sopenharmony_ci * Record a range of memory for which the metadata pages will be created once
2862306a36Sopenharmony_ci * the page allocator becomes available.
2962306a36Sopenharmony_ci */
3062306a36Sopenharmony_cistatic void __init kmsan_record_future_shadow_range(void *start, void *end)
3162306a36Sopenharmony_ci{
3262306a36Sopenharmony_ci	u64 nstart = (u64)start, nend = (u64)end, cstart, cend;
3362306a36Sopenharmony_ci	bool merged = false;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	KMSAN_WARN_ON(future_index == NUM_FUTURE_RANGES);
3662306a36Sopenharmony_ci	KMSAN_WARN_ON((nstart >= nend) || !nstart || !nend);
3762306a36Sopenharmony_ci	nstart = ALIGN_DOWN(nstart, PAGE_SIZE);
3862306a36Sopenharmony_ci	nend = ALIGN(nend, PAGE_SIZE);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	/*
4162306a36Sopenharmony_ci	 * Scan the existing ranges to see if any of them overlaps with
4262306a36Sopenharmony_ci	 * [start, end). In that case, merge the two ranges instead of
4362306a36Sopenharmony_ci	 * creating a new one.
4462306a36Sopenharmony_ci	 * The number of ranges is less than 20, so there is no need to organize
4562306a36Sopenharmony_ci	 * them into a more intelligent data structure.
4662306a36Sopenharmony_ci	 */
4762306a36Sopenharmony_ci	for (int i = 0; i < future_index; i++) {
4862306a36Sopenharmony_ci		cstart = start_end_pairs[i].start;
4962306a36Sopenharmony_ci		cend = start_end_pairs[i].end;
5062306a36Sopenharmony_ci		if ((cstart < nstart && cend < nstart) ||
5162306a36Sopenharmony_ci		    (cstart > nend && cend > nend))
5262306a36Sopenharmony_ci			/* ranges are disjoint - do not merge */
5362306a36Sopenharmony_ci			continue;
5462306a36Sopenharmony_ci		start_end_pairs[i].start = min(nstart, cstart);
5562306a36Sopenharmony_ci		start_end_pairs[i].end = max(nend, cend);
5662306a36Sopenharmony_ci		merged = true;
5762306a36Sopenharmony_ci		break;
5862306a36Sopenharmony_ci	}
5962306a36Sopenharmony_ci	if (merged)
6062306a36Sopenharmony_ci		return;
6162306a36Sopenharmony_ci	start_end_pairs[future_index].start = nstart;
6262306a36Sopenharmony_ci	start_end_pairs[future_index].end = nend;
6362306a36Sopenharmony_ci	future_index++;
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci/*
6762306a36Sopenharmony_ci * Initialize the shadow for existing mappings during kernel initialization.
6862306a36Sopenharmony_ci * These include kernel text/data sections, NODE_DATA and future ranges
6962306a36Sopenharmony_ci * registered while creating other data (e.g. percpu).
7062306a36Sopenharmony_ci *
7162306a36Sopenharmony_ci * Allocations via memblock can be only done before slab is initialized.
7262306a36Sopenharmony_ci */
7362306a36Sopenharmony_civoid __init kmsan_init_shadow(void)
7462306a36Sopenharmony_ci{
7562306a36Sopenharmony_ci	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
7662306a36Sopenharmony_ci	phys_addr_t p_start, p_end;
7762306a36Sopenharmony_ci	u64 loop;
7862306a36Sopenharmony_ci	int nid;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	for_each_reserved_mem_range(loop, &p_start, &p_end)
8162306a36Sopenharmony_ci		kmsan_record_future_shadow_range(phys_to_virt(p_start),
8262306a36Sopenharmony_ci						 phys_to_virt(p_end));
8362306a36Sopenharmony_ci	/* Allocate shadow for .data */
8462306a36Sopenharmony_ci	kmsan_record_future_shadow_range(_sdata, _edata);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	for_each_online_node(nid)
8762306a36Sopenharmony_ci		kmsan_record_future_shadow_range(
8862306a36Sopenharmony_ci			NODE_DATA(nid), (char *)NODE_DATA(nid) + nd_size);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	for (int i = 0; i < future_index; i++)
9162306a36Sopenharmony_ci		kmsan_init_alloc_meta_for_range(
9262306a36Sopenharmony_ci			(void *)start_end_pairs[i].start,
9362306a36Sopenharmony_ci			(void *)start_end_pairs[i].end);
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistruct metadata_page_pair {
9762306a36Sopenharmony_ci	struct page *shadow, *origin;
9862306a36Sopenharmony_ci};
9962306a36Sopenharmony_cistatic struct metadata_page_pair held_back[MAX_ORDER + 1] __initdata;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci/*
10262306a36Sopenharmony_ci * Eager metadata allocation. When the memblock allocator is freeing pages to
10362306a36Sopenharmony_ci * pagealloc, we use 2/3 of them as metadata for the remaining 1/3.
10462306a36Sopenharmony_ci * We store the pointers to the returned blocks of pages in held_back[] grouped
10562306a36Sopenharmony_ci * by their order: when kmsan_memblock_free_pages() is called for the first
10662306a36Sopenharmony_ci * time with a certain order, it is reserved as a shadow block, for the second
10762306a36Sopenharmony_ci * time - as an origin block. On the third time the incoming block receives its
10862306a36Sopenharmony_ci * shadow and origin ranges from the previously saved shadow and origin blocks,
10962306a36Sopenharmony_ci * after which held_back[order] can be used again.
11062306a36Sopenharmony_ci *
11162306a36Sopenharmony_ci * At the very end there may be leftover blocks in held_back[]. They are
11262306a36Sopenharmony_ci * collected later by kmsan_memblock_discard().
11362306a36Sopenharmony_ci */
11462306a36Sopenharmony_cibool kmsan_memblock_free_pages(struct page *page, unsigned int order)
11562306a36Sopenharmony_ci{
11662306a36Sopenharmony_ci	struct page *shadow, *origin;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	if (!held_back[order].shadow) {
11962306a36Sopenharmony_ci		held_back[order].shadow = page;
12062306a36Sopenharmony_ci		return false;
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci	if (!held_back[order].origin) {
12362306a36Sopenharmony_ci		held_back[order].origin = page;
12462306a36Sopenharmony_ci		return false;
12562306a36Sopenharmony_ci	}
12662306a36Sopenharmony_ci	shadow = held_back[order].shadow;
12762306a36Sopenharmony_ci	origin = held_back[order].origin;
12862306a36Sopenharmony_ci	kmsan_setup_meta(page, shadow, origin, order);
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	held_back[order].shadow = NULL;
13162306a36Sopenharmony_ci	held_back[order].origin = NULL;
13262306a36Sopenharmony_ci	return true;
13362306a36Sopenharmony_ci}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci#define MAX_BLOCKS 8
13662306a36Sopenharmony_cistruct smallstack {
13762306a36Sopenharmony_ci	struct page *items[MAX_BLOCKS];
13862306a36Sopenharmony_ci	int index;
13962306a36Sopenharmony_ci	int order;
14062306a36Sopenharmony_ci};
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cistatic struct smallstack collect = {
14362306a36Sopenharmony_ci	.index = 0,
14462306a36Sopenharmony_ci	.order = MAX_ORDER,
14562306a36Sopenharmony_ci};
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_cistatic void smallstack_push(struct smallstack *stack, struct page *pages)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	KMSAN_WARN_ON(stack->index == MAX_BLOCKS);
15062306a36Sopenharmony_ci	stack->items[stack->index] = pages;
15162306a36Sopenharmony_ci	stack->index++;
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci#undef MAX_BLOCKS
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_cistatic struct page *smallstack_pop(struct smallstack *stack)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	struct page *ret;
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	KMSAN_WARN_ON(stack->index == 0);
16062306a36Sopenharmony_ci	stack->index--;
16162306a36Sopenharmony_ci	ret = stack->items[stack->index];
16262306a36Sopenharmony_ci	stack->items[stack->index] = NULL;
16362306a36Sopenharmony_ci	return ret;
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_cistatic void do_collection(void)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	struct page *page, *shadow, *origin;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	while (collect.index >= 3) {
17162306a36Sopenharmony_ci		page = smallstack_pop(&collect);
17262306a36Sopenharmony_ci		shadow = smallstack_pop(&collect);
17362306a36Sopenharmony_ci		origin = smallstack_pop(&collect);
17462306a36Sopenharmony_ci		kmsan_setup_meta(page, shadow, origin, collect.order);
17562306a36Sopenharmony_ci		__free_pages_core(page, collect.order);
17662306a36Sopenharmony_ci	}
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic void collect_split(void)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	struct smallstack tmp = {
18262306a36Sopenharmony_ci		.order = collect.order - 1,
18362306a36Sopenharmony_ci		.index = 0,
18462306a36Sopenharmony_ci	};
18562306a36Sopenharmony_ci	struct page *page;
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	if (!collect.order)
18862306a36Sopenharmony_ci		return;
18962306a36Sopenharmony_ci	while (collect.index) {
19062306a36Sopenharmony_ci		page = smallstack_pop(&collect);
19162306a36Sopenharmony_ci		smallstack_push(&tmp, &page[0]);
19262306a36Sopenharmony_ci		smallstack_push(&tmp, &page[1 << tmp.order]);
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci	__memcpy(&collect, &tmp, sizeof(tmp));
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci/*
19862306a36Sopenharmony_ci * Memblock is about to go away. Split the page blocks left over in held_back[]
19962306a36Sopenharmony_ci * and return 1/3 of that memory to the system.
20062306a36Sopenharmony_ci */
20162306a36Sopenharmony_cistatic void kmsan_memblock_discard(void)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	/*
20462306a36Sopenharmony_ci	 * For each order=N:
20562306a36Sopenharmony_ci	 *  - push held_back[N].shadow and .origin to @collect;
20662306a36Sopenharmony_ci	 *  - while there are >= 3 elements in @collect, do garbage collection:
20762306a36Sopenharmony_ci	 *    - pop 3 ranges from @collect;
20862306a36Sopenharmony_ci	 *    - use two of them as shadow and origin for the third one;
20962306a36Sopenharmony_ci	 *    - repeat;
21062306a36Sopenharmony_ci	 *  - split each remaining element from @collect into 2 ranges of
21162306a36Sopenharmony_ci	 *    order=N-1,
21262306a36Sopenharmony_ci	 *  - repeat.
21362306a36Sopenharmony_ci	 */
21462306a36Sopenharmony_ci	collect.order = MAX_ORDER;
21562306a36Sopenharmony_ci	for (int i = MAX_ORDER; i >= 0; i--) {
21662306a36Sopenharmony_ci		if (held_back[i].shadow)
21762306a36Sopenharmony_ci			smallstack_push(&collect, held_back[i].shadow);
21862306a36Sopenharmony_ci		if (held_back[i].origin)
21962306a36Sopenharmony_ci			smallstack_push(&collect, held_back[i].origin);
22062306a36Sopenharmony_ci		held_back[i].shadow = NULL;
22162306a36Sopenharmony_ci		held_back[i].origin = NULL;
22262306a36Sopenharmony_ci		do_collection();
22362306a36Sopenharmony_ci		collect_split();
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_civoid __init kmsan_init_runtime(void)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	/* Assuming current is init_task */
23062306a36Sopenharmony_ci	kmsan_internal_task_create(current);
23162306a36Sopenharmony_ci	kmsan_memblock_discard();
23262306a36Sopenharmony_ci	pr_info("Starting KernelMemorySanitizer\n");
23362306a36Sopenharmony_ci	pr_info("ATTENTION: KMSAN is a debugging tool! Do not use it on production machines!\n");
23462306a36Sopenharmony_ci	kmsan_enabled = true;
23562306a36Sopenharmony_ci}
236