162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * KVM dirty page logging test
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2018, Red Hat, Inc.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#define _GNU_SOURCE /* for program_invocation_name */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <stdio.h>
1162306a36Sopenharmony_ci#include <stdlib.h>
1262306a36Sopenharmony_ci#include <pthread.h>
1362306a36Sopenharmony_ci#include <semaphore.h>
1462306a36Sopenharmony_ci#include <sys/types.h>
1562306a36Sopenharmony_ci#include <signal.h>
1662306a36Sopenharmony_ci#include <errno.h>
1762306a36Sopenharmony_ci#include <linux/bitmap.h>
1862306a36Sopenharmony_ci#include <linux/bitops.h>
1962306a36Sopenharmony_ci#include <linux/atomic.h>
2062306a36Sopenharmony_ci#include <asm/barrier.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include "kvm_util.h"
2362306a36Sopenharmony_ci#include "test_util.h"
2462306a36Sopenharmony_ci#include "guest_modes.h"
2562306a36Sopenharmony_ci#include "processor.h"
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define DIRTY_MEM_BITS 30 /* 1G */
2862306a36Sopenharmony_ci#define PAGE_SHIFT_4K  12
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/* The memory slot index to track dirty pages */
3162306a36Sopenharmony_ci#define TEST_MEM_SLOT_INDEX		1
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci/* Default guest test virtual memory offset */
3462306a36Sopenharmony_ci#define DEFAULT_GUEST_TEST_MEM		0xc0000000
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/* How many pages to dirty for each guest loop */
3762306a36Sopenharmony_ci#define TEST_PAGES_PER_LOOP		1024
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
4062306a36Sopenharmony_ci#define TEST_HOST_LOOP_N		32UL
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/* Interval for each host loop (ms) */
4362306a36Sopenharmony_ci#define TEST_HOST_LOOP_INTERVAL		10UL
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci/* Dirty bitmaps are always little endian, so we need to swap on big endian */
4662306a36Sopenharmony_ci#if defined(__s390x__)
4762306a36Sopenharmony_ci# define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
4862306a36Sopenharmony_ci# define test_bit_le(nr, addr) \
4962306a36Sopenharmony_ci	test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
5062306a36Sopenharmony_ci# define __set_bit_le(nr, addr) \
5162306a36Sopenharmony_ci	__set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
5262306a36Sopenharmony_ci# define __clear_bit_le(nr, addr) \
5362306a36Sopenharmony_ci	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
5462306a36Sopenharmony_ci# define __test_and_set_bit_le(nr, addr) \
5562306a36Sopenharmony_ci	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
5662306a36Sopenharmony_ci# define __test_and_clear_bit_le(nr, addr) \
5762306a36Sopenharmony_ci	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
5862306a36Sopenharmony_ci#else
5962306a36Sopenharmony_ci# define test_bit_le			test_bit
6062306a36Sopenharmony_ci# define __set_bit_le			__set_bit
6162306a36Sopenharmony_ci# define __clear_bit_le			__clear_bit
6262306a36Sopenharmony_ci# define __test_and_set_bit_le		__test_and_set_bit
6362306a36Sopenharmony_ci# define __test_and_clear_bit_le	__test_and_clear_bit
6462306a36Sopenharmony_ci#endif
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci#define TEST_DIRTY_RING_COUNT		65536
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci#define SIG_IPI SIGUSR1
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci/*
7162306a36Sopenharmony_ci * Guest/Host shared variables. Ensure addr_gva2hva() and/or
7262306a36Sopenharmony_ci * sync_global_to/from_guest() are used when accessing from
7362306a36Sopenharmony_ci * the host. READ/WRITE_ONCE() should also be used with anything
7462306a36Sopenharmony_ci * that may change.
7562306a36Sopenharmony_ci */
7662306a36Sopenharmony_cistatic uint64_t host_page_size;
7762306a36Sopenharmony_cistatic uint64_t guest_page_size;
7862306a36Sopenharmony_cistatic uint64_t guest_num_pages;
7962306a36Sopenharmony_cistatic uint64_t random_array[TEST_PAGES_PER_LOOP];
8062306a36Sopenharmony_cistatic uint64_t iteration;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci/*
8362306a36Sopenharmony_ci * Guest physical memory offset of the testing memory slot.
8462306a36Sopenharmony_ci * This will be set to the topmost valid physical address minus
8562306a36Sopenharmony_ci * the test memory size.
8662306a36Sopenharmony_ci */
8762306a36Sopenharmony_cistatic uint64_t guest_test_phys_mem;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci/*
9062306a36Sopenharmony_ci * Guest virtual memory offset of the testing memory slot.
9162306a36Sopenharmony_ci * Must not conflict with identity mapped test code.
9262306a36Sopenharmony_ci */
9362306a36Sopenharmony_cistatic uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci/*
9662306a36Sopenharmony_ci * Continuously write to the first 8 bytes of a random pages within
9762306a36Sopenharmony_ci * the testing memory region.
9862306a36Sopenharmony_ci */
9962306a36Sopenharmony_cistatic void guest_code(void)
10062306a36Sopenharmony_ci{
10162306a36Sopenharmony_ci	uint64_t addr;
10262306a36Sopenharmony_ci	int i;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/*
10562306a36Sopenharmony_ci	 * On s390x, all pages of a 1M segment are initially marked as dirty
10662306a36Sopenharmony_ci	 * when a page of the segment is written to for the very first time.
10762306a36Sopenharmony_ci	 * To compensate this specialty in this test, we need to touch all
10862306a36Sopenharmony_ci	 * pages during the first iteration.
10962306a36Sopenharmony_ci	 */
11062306a36Sopenharmony_ci	for (i = 0; i < guest_num_pages; i++) {
11162306a36Sopenharmony_ci		addr = guest_test_virt_mem + i * guest_page_size;
11262306a36Sopenharmony_ci		*(uint64_t *)addr = READ_ONCE(iteration);
11362306a36Sopenharmony_ci	}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	while (true) {
11662306a36Sopenharmony_ci		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
11762306a36Sopenharmony_ci			addr = guest_test_virt_mem;
11862306a36Sopenharmony_ci			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
11962306a36Sopenharmony_ci				* guest_page_size;
12062306a36Sopenharmony_ci			addr = align_down(addr, host_page_size);
12162306a36Sopenharmony_ci			*(uint64_t *)addr = READ_ONCE(iteration);
12262306a36Sopenharmony_ci		}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci		/* Tell the host that we need more random numbers */
12562306a36Sopenharmony_ci		GUEST_SYNC(1);
12662306a36Sopenharmony_ci	}
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci/* Host variables */
13062306a36Sopenharmony_cistatic bool host_quit;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci/* Points to the test VM memory region on which we track dirty logs */
13362306a36Sopenharmony_cistatic void *host_test_mem;
13462306a36Sopenharmony_cistatic uint64_t host_num_pages;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci/* For statistics only */
13762306a36Sopenharmony_cistatic uint64_t host_dirty_count;
13862306a36Sopenharmony_cistatic uint64_t host_clear_count;
13962306a36Sopenharmony_cistatic uint64_t host_track_next_count;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci/* Whether dirty ring reset is requested, or finished */
14262306a36Sopenharmony_cistatic sem_t sem_vcpu_stop;
14362306a36Sopenharmony_cistatic sem_t sem_vcpu_cont;
14462306a36Sopenharmony_ci/*
14562306a36Sopenharmony_ci * This is only set by main thread, and only cleared by vcpu thread.  It is
14662306a36Sopenharmony_ci * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
14762306a36Sopenharmony_ci * is the only place that we'll guarantee both "dirty bit" and "dirty data"
14862306a36Sopenharmony_ci * will match.  E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
14962306a36Sopenharmony_ci * after setting dirty bit but before the data is written.
15062306a36Sopenharmony_ci */
15162306a36Sopenharmony_cistatic atomic_t vcpu_sync_stop_requested;
15262306a36Sopenharmony_ci/*
15362306a36Sopenharmony_ci * This is updated by the vcpu thread to tell the host whether it's a
15462306a36Sopenharmony_ci * ring-full event.  It should only be read until a sem_wait() of
15562306a36Sopenharmony_ci * sem_vcpu_stop and before vcpu continues to run.
15662306a36Sopenharmony_ci */
15762306a36Sopenharmony_cistatic bool dirty_ring_vcpu_ring_full;
15862306a36Sopenharmony_ci/*
15962306a36Sopenharmony_ci * This is only used for verifying the dirty pages.  Dirty ring has a very
16062306a36Sopenharmony_ci * tricky case when the ring just got full, kvm will do userspace exit due to
16162306a36Sopenharmony_ci * ring full.  When that happens, the very last PFN is set but actually the
16262306a36Sopenharmony_ci * data is not changed (the guest WRITE is not really applied yet), because
16362306a36Sopenharmony_ci * we found that the dirty ring is full, refused to continue the vcpu, and
16462306a36Sopenharmony_ci * recorded the dirty gfn with the old contents.
16562306a36Sopenharmony_ci *
16662306a36Sopenharmony_ci * For this specific case, it's safe to skip checking this pfn for this
16762306a36Sopenharmony_ci * bit, because it's a redundant bit, and when the write happens later the bit
16862306a36Sopenharmony_ci * will be set again.  We use this variable to always keep track of the latest
16962306a36Sopenharmony_ci * dirty gfn we've collected, so that if a mismatch of data found later in the
17062306a36Sopenharmony_ci * verifying process, we let it pass.
17162306a36Sopenharmony_ci */
17262306a36Sopenharmony_cistatic uint64_t dirty_ring_last_page;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cienum log_mode_t {
17562306a36Sopenharmony_ci	/* Only use KVM_GET_DIRTY_LOG for logging */
17662306a36Sopenharmony_ci	LOG_MODE_DIRTY_LOG = 0,
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
17962306a36Sopenharmony_ci	LOG_MODE_CLEAR_LOG = 1,
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	/* Use dirty ring for logging */
18262306a36Sopenharmony_ci	LOG_MODE_DIRTY_RING = 2,
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	LOG_MODE_NUM,
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	/* Run all supported modes */
18762306a36Sopenharmony_ci	LOG_MODE_ALL = LOG_MODE_NUM,
18862306a36Sopenharmony_ci};
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci/* Mode of logging to test.  Default is to run all supported modes */
19162306a36Sopenharmony_cistatic enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
19262306a36Sopenharmony_ci/* Logging mode for current run */
19362306a36Sopenharmony_cistatic enum log_mode_t host_log_mode;
19462306a36Sopenharmony_cistatic pthread_t vcpu_thread;
19562306a36Sopenharmony_cistatic uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic void vcpu_kick(void)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	pthread_kill(vcpu_thread, SIG_IPI);
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci/*
20362306a36Sopenharmony_ci * In our test we do signal tricks, let's use a better version of
20462306a36Sopenharmony_ci * sem_wait to avoid signal interrupts
20562306a36Sopenharmony_ci */
20662306a36Sopenharmony_cistatic void sem_wait_until(sem_t *sem)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	int ret;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	do
21162306a36Sopenharmony_ci		ret = sem_wait(sem);
21262306a36Sopenharmony_ci	while (ret == -1 && errno == EINTR);
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic bool clear_log_supported(void)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic void clear_log_create_vm_done(struct kvm_vm *vm)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	u64 manual_caps;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
22562306a36Sopenharmony_ci	TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
22662306a36Sopenharmony_ci	manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
22762306a36Sopenharmony_ci			KVM_DIRTY_LOG_INITIALLY_SET);
22862306a36Sopenharmony_ci	vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, manual_caps);
22962306a36Sopenharmony_ci}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_cistatic void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
23262306a36Sopenharmony_ci					  void *bitmap, uint32_t num_pages,
23362306a36Sopenharmony_ci					  uint32_t *unused)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
23962306a36Sopenharmony_ci					  void *bitmap, uint32_t num_pages,
24062306a36Sopenharmony_ci					  uint32_t *unused)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
24362306a36Sopenharmony_ci	kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci/* Should only be called after a GUEST_SYNC */
24762306a36Sopenharmony_cistatic void vcpu_handle_sync_stop(void)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	if (atomic_read(&vcpu_sync_stop_requested)) {
25062306a36Sopenharmony_ci		/* It means main thread is sleeping waiting */
25162306a36Sopenharmony_ci		atomic_set(&vcpu_sync_stop_requested, false);
25262306a36Sopenharmony_ci		sem_post(&sem_vcpu_stop);
25362306a36Sopenharmony_ci		sem_wait_until(&sem_vcpu_cont);
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_cistatic void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
25862306a36Sopenharmony_ci{
25962306a36Sopenharmony_ci	struct kvm_run *run = vcpu->run;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
26262306a36Sopenharmony_ci		    "vcpu run failed: errno=%d", err);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
26562306a36Sopenharmony_ci		    "Invalid guest sync status: exit_reason=%s\n",
26662306a36Sopenharmony_ci		    exit_reason_str(run->exit_reason));
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	vcpu_handle_sync_stop();
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_cistatic bool dirty_ring_supported(void)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) ||
27462306a36Sopenharmony_ci		kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL));
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_cistatic void dirty_ring_create_vm_done(struct kvm_vm *vm)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	uint64_t pages;
28062306a36Sopenharmony_ci	uint32_t limit;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	/*
28362306a36Sopenharmony_ci	 * We rely on vcpu exit due to full dirty ring state. Adjust
28462306a36Sopenharmony_ci	 * the ring buffer size to ensure we're able to reach the
28562306a36Sopenharmony_ci	 * full dirty ring state.
28662306a36Sopenharmony_ci	 */
28762306a36Sopenharmony_ci	pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
28862306a36Sopenharmony_ci	pages = vm_adjust_num_guest_pages(vm->mode, pages);
28962306a36Sopenharmony_ci	if (vm->page_size < getpagesize())
29062306a36Sopenharmony_ci		pages = vm_num_host_pages(vm->mode, pages);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	limit = 1 << (31 - __builtin_clz(pages));
29362306a36Sopenharmony_ci	test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
29462306a36Sopenharmony_ci	test_dirty_ring_count = min(limit, test_dirty_ring_count);
29562306a36Sopenharmony_ci	pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	/*
29862306a36Sopenharmony_ci	 * Switch to dirty ring mode after VM creation but before any
29962306a36Sopenharmony_ci	 * of the vcpu creation.
30062306a36Sopenharmony_ci	 */
30162306a36Sopenharmony_ci	vm_enable_dirty_ring(vm, test_dirty_ring_count *
30262306a36Sopenharmony_ci			     sizeof(struct kvm_dirty_gfn));
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_cistatic inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY;
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_cistatic inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET);
31362306a36Sopenharmony_ci}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_cistatic uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
31662306a36Sopenharmony_ci				       int slot, void *bitmap,
31762306a36Sopenharmony_ci				       uint32_t num_pages, uint32_t *fetch_index)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	struct kvm_dirty_gfn *cur;
32062306a36Sopenharmony_ci	uint32_t count = 0;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	while (true) {
32362306a36Sopenharmony_ci		cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
32462306a36Sopenharmony_ci		if (!dirty_gfn_is_dirtied(cur))
32562306a36Sopenharmony_ci			break;
32662306a36Sopenharmony_ci		TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
32762306a36Sopenharmony_ci			    "%u != %u", cur->slot, slot);
32862306a36Sopenharmony_ci		TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
32962306a36Sopenharmony_ci			    "0x%llx >= 0x%x", cur->offset, num_pages);
33062306a36Sopenharmony_ci		//pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
33162306a36Sopenharmony_ci		__set_bit_le(cur->offset, bitmap);
33262306a36Sopenharmony_ci		dirty_ring_last_page = cur->offset;
33362306a36Sopenharmony_ci		dirty_gfn_set_collected(cur);
33462306a36Sopenharmony_ci		(*fetch_index)++;
33562306a36Sopenharmony_ci		count++;
33662306a36Sopenharmony_ci	}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	return count;
33962306a36Sopenharmony_ci}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_cistatic void dirty_ring_wait_vcpu(void)
34262306a36Sopenharmony_ci{
34362306a36Sopenharmony_ci	/* This makes sure that hardware PML cache flushed */
34462306a36Sopenharmony_ci	vcpu_kick();
34562306a36Sopenharmony_ci	sem_wait_until(&sem_vcpu_stop);
34662306a36Sopenharmony_ci}
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_cistatic void dirty_ring_continue_vcpu(void)
34962306a36Sopenharmony_ci{
35062306a36Sopenharmony_ci	pr_info("Notifying vcpu to continue\n");
35162306a36Sopenharmony_ci	sem_post(&sem_vcpu_cont);
35262306a36Sopenharmony_ci}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_cistatic void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
35562306a36Sopenharmony_ci					   void *bitmap, uint32_t num_pages,
35662306a36Sopenharmony_ci					   uint32_t *ring_buf_idx)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	uint32_t count = 0, cleared;
35962306a36Sopenharmony_ci	bool continued_vcpu = false;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	dirty_ring_wait_vcpu();
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	if (!dirty_ring_vcpu_ring_full) {
36462306a36Sopenharmony_ci		/*
36562306a36Sopenharmony_ci		 * This is not a ring-full event, it's safe to allow
36662306a36Sopenharmony_ci		 * vcpu to continue
36762306a36Sopenharmony_ci		 */
36862306a36Sopenharmony_ci		dirty_ring_continue_vcpu();
36962306a36Sopenharmony_ci		continued_vcpu = true;
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	/* Only have one vcpu */
37362306a36Sopenharmony_ci	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
37462306a36Sopenharmony_ci				       slot, bitmap, num_pages,
37562306a36Sopenharmony_ci				       ring_buf_idx);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	/*
38062306a36Sopenharmony_ci	 * Cleared pages should be the same as collected, as KVM is supposed to
38162306a36Sopenharmony_ci	 * clear only the entries that have been harvested.
38262306a36Sopenharmony_ci	 */
38362306a36Sopenharmony_ci	TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
38462306a36Sopenharmony_ci		    "with collected (%u)", cleared, count);
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	if (!continued_vcpu) {
38762306a36Sopenharmony_ci		TEST_ASSERT(dirty_ring_vcpu_ring_full,
38862306a36Sopenharmony_ci			    "Didn't continue vcpu even without ring full");
38962306a36Sopenharmony_ci		dirty_ring_continue_vcpu();
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	pr_info("Iteration %ld collected %u pages\n", iteration, count);
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_cistatic void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
39662306a36Sopenharmony_ci{
39762306a36Sopenharmony_ci	struct kvm_run *run = vcpu->run;
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	/* A ucall-sync or ring-full event is allowed */
40062306a36Sopenharmony_ci	if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
40162306a36Sopenharmony_ci		/* We should allow this to continue */
40262306a36Sopenharmony_ci		;
40362306a36Sopenharmony_ci	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
40462306a36Sopenharmony_ci		   (ret == -1 && err == EINTR)) {
40562306a36Sopenharmony_ci		/* Update the flag first before pause */
40662306a36Sopenharmony_ci		WRITE_ONCE(dirty_ring_vcpu_ring_full,
40762306a36Sopenharmony_ci			   run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
40862306a36Sopenharmony_ci		sem_post(&sem_vcpu_stop);
40962306a36Sopenharmony_ci		pr_info("vcpu stops because %s...\n",
41062306a36Sopenharmony_ci			dirty_ring_vcpu_ring_full ?
41162306a36Sopenharmony_ci			"dirty ring is full" : "vcpu is kicked out");
41262306a36Sopenharmony_ci		sem_wait_until(&sem_vcpu_cont);
41362306a36Sopenharmony_ci		pr_info("vcpu continues now.\n");
41462306a36Sopenharmony_ci	} else {
41562306a36Sopenharmony_ci		TEST_ASSERT(false, "Invalid guest sync status: "
41662306a36Sopenharmony_ci			    "exit_reason=%s\n",
41762306a36Sopenharmony_ci			    exit_reason_str(run->exit_reason));
41862306a36Sopenharmony_ci	}
41962306a36Sopenharmony_ci}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_cistruct log_mode {
42262306a36Sopenharmony_ci	const char *name;
42362306a36Sopenharmony_ci	/* Return true if this mode is supported, otherwise false */
42462306a36Sopenharmony_ci	bool (*supported)(void);
42562306a36Sopenharmony_ci	/* Hook when the vm creation is done (before vcpu creation) */
42662306a36Sopenharmony_ci	void (*create_vm_done)(struct kvm_vm *vm);
42762306a36Sopenharmony_ci	/* Hook to collect the dirty pages into the bitmap provided */
42862306a36Sopenharmony_ci	void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
42962306a36Sopenharmony_ci				     void *bitmap, uint32_t num_pages,
43062306a36Sopenharmony_ci				     uint32_t *ring_buf_idx);
43162306a36Sopenharmony_ci	/* Hook to call when after each vcpu run */
43262306a36Sopenharmony_ci	void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
43362306a36Sopenharmony_ci} log_modes[LOG_MODE_NUM] = {
43462306a36Sopenharmony_ci	{
43562306a36Sopenharmony_ci		.name = "dirty-log",
43662306a36Sopenharmony_ci		.collect_dirty_pages = dirty_log_collect_dirty_pages,
43762306a36Sopenharmony_ci		.after_vcpu_run = default_after_vcpu_run,
43862306a36Sopenharmony_ci	},
43962306a36Sopenharmony_ci	{
44062306a36Sopenharmony_ci		.name = "clear-log",
44162306a36Sopenharmony_ci		.supported = clear_log_supported,
44262306a36Sopenharmony_ci		.create_vm_done = clear_log_create_vm_done,
44362306a36Sopenharmony_ci		.collect_dirty_pages = clear_log_collect_dirty_pages,
44462306a36Sopenharmony_ci		.after_vcpu_run = default_after_vcpu_run,
44562306a36Sopenharmony_ci	},
44662306a36Sopenharmony_ci	{
44762306a36Sopenharmony_ci		.name = "dirty-ring",
44862306a36Sopenharmony_ci		.supported = dirty_ring_supported,
44962306a36Sopenharmony_ci		.create_vm_done = dirty_ring_create_vm_done,
45062306a36Sopenharmony_ci		.collect_dirty_pages = dirty_ring_collect_dirty_pages,
45162306a36Sopenharmony_ci		.after_vcpu_run = dirty_ring_after_vcpu_run,
45262306a36Sopenharmony_ci	},
45362306a36Sopenharmony_ci};
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci/*
45662306a36Sopenharmony_ci * We use this bitmap to track some pages that should have its dirty
45762306a36Sopenharmony_ci * bit set in the _next_ iteration.  For example, if we detected the
45862306a36Sopenharmony_ci * page value changed to current iteration but at the same time the
45962306a36Sopenharmony_ci * page bit is cleared in the latest bitmap, then the system must
46062306a36Sopenharmony_ci * report that write in the next get dirty log call.
46162306a36Sopenharmony_ci */
46262306a36Sopenharmony_cistatic unsigned long *host_bmap_track;
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_cistatic void log_modes_dump(void)
46562306a36Sopenharmony_ci{
46662306a36Sopenharmony_ci	int i;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	printf("all");
46962306a36Sopenharmony_ci	for (i = 0; i < LOG_MODE_NUM; i++)
47062306a36Sopenharmony_ci		printf(", %s", log_modes[i].name);
47162306a36Sopenharmony_ci	printf("\n");
47262306a36Sopenharmony_ci}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cistatic bool log_mode_supported(void)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	struct log_mode *mode = &log_modes[host_log_mode];
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	if (mode->supported)
47962306a36Sopenharmony_ci		return mode->supported();
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	return true;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_cistatic void log_mode_create_vm_done(struct kvm_vm *vm)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	struct log_mode *mode = &log_modes[host_log_mode];
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	if (mode->create_vm_done)
48962306a36Sopenharmony_ci		mode->create_vm_done(vm);
49062306a36Sopenharmony_ci}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_cistatic void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
49362306a36Sopenharmony_ci					 void *bitmap, uint32_t num_pages,
49462306a36Sopenharmony_ci					 uint32_t *ring_buf_idx)
49562306a36Sopenharmony_ci{
49662306a36Sopenharmony_ci	struct log_mode *mode = &log_modes[host_log_mode];
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	TEST_ASSERT(mode->collect_dirty_pages != NULL,
49962306a36Sopenharmony_ci		    "collect_dirty_pages() is required for any log mode!");
50062306a36Sopenharmony_ci	mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
50162306a36Sopenharmony_ci}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_cistatic void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	struct log_mode *mode = &log_modes[host_log_mode];
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	if (mode->after_vcpu_run)
50862306a36Sopenharmony_ci		mode->after_vcpu_run(vcpu, ret, err);
50962306a36Sopenharmony_ci}
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_cistatic void generate_random_array(uint64_t *guest_array, uint64_t size)
51262306a36Sopenharmony_ci{
51362306a36Sopenharmony_ci	uint64_t i;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	for (i = 0; i < size; i++)
51662306a36Sopenharmony_ci		guest_array[i] = random();
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_cistatic void *vcpu_worker(void *data)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	int ret;
52262306a36Sopenharmony_ci	struct kvm_vcpu *vcpu = data;
52362306a36Sopenharmony_ci	struct kvm_vm *vm = vcpu->vm;
52462306a36Sopenharmony_ci	uint64_t *guest_array;
52562306a36Sopenharmony_ci	uint64_t pages_count = 0;
52662306a36Sopenharmony_ci	struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
52762306a36Sopenharmony_ci						 + sizeof(sigset_t));
52862306a36Sopenharmony_ci	sigset_t *sigset = (sigset_t *) &sigmask->sigset;
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	/*
53162306a36Sopenharmony_ci	 * SIG_IPI is unblocked atomically while in KVM_RUN.  It causes the
53262306a36Sopenharmony_ci	 * ioctl to return with -EINTR, but it is still pending and we need
53362306a36Sopenharmony_ci	 * to accept it with the sigwait.
53462306a36Sopenharmony_ci	 */
53562306a36Sopenharmony_ci	sigmask->len = 8;
53662306a36Sopenharmony_ci	pthread_sigmask(0, NULL, sigset);
53762306a36Sopenharmony_ci	sigdelset(sigset, SIG_IPI);
53862306a36Sopenharmony_ci	vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	sigemptyset(sigset);
54162306a36Sopenharmony_ci	sigaddset(sigset, SIG_IPI);
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	while (!READ_ONCE(host_quit)) {
54662306a36Sopenharmony_ci		/* Clear any existing kick signals */
54762306a36Sopenharmony_ci		generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
54862306a36Sopenharmony_ci		pages_count += TEST_PAGES_PER_LOOP;
54962306a36Sopenharmony_ci		/* Let the guest dirty the random pages */
55062306a36Sopenharmony_ci		ret = __vcpu_run(vcpu);
55162306a36Sopenharmony_ci		if (ret == -1 && errno == EINTR) {
55262306a36Sopenharmony_ci			int sig = -1;
55362306a36Sopenharmony_ci			sigwait(sigset, &sig);
55462306a36Sopenharmony_ci			assert(sig == SIG_IPI);
55562306a36Sopenharmony_ci		}
55662306a36Sopenharmony_ci		log_mode_after_vcpu_run(vcpu, ret, errno);
55762306a36Sopenharmony_ci	}
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	return NULL;
56262306a36Sopenharmony_ci}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_cistatic void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
56562306a36Sopenharmony_ci{
56662306a36Sopenharmony_ci	uint64_t step = vm_num_host_pages(mode, 1);
56762306a36Sopenharmony_ci	uint64_t page;
56862306a36Sopenharmony_ci	uint64_t *value_ptr;
56962306a36Sopenharmony_ci	uint64_t min_iter = 0;
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	for (page = 0; page < host_num_pages; page += step) {
57262306a36Sopenharmony_ci		value_ptr = host_test_mem + page * host_page_size;
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci		/* If this is a special page that we were tracking... */
57562306a36Sopenharmony_ci		if (__test_and_clear_bit_le(page, host_bmap_track)) {
57662306a36Sopenharmony_ci			host_track_next_count++;
57762306a36Sopenharmony_ci			TEST_ASSERT(test_bit_le(page, bmap),
57862306a36Sopenharmony_ci				    "Page %"PRIu64" should have its dirty bit "
57962306a36Sopenharmony_ci				    "set in this iteration but it is missing",
58062306a36Sopenharmony_ci				    page);
58162306a36Sopenharmony_ci		}
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci		if (__test_and_clear_bit_le(page, bmap)) {
58462306a36Sopenharmony_ci			bool matched;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci			host_dirty_count++;
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci			/*
58962306a36Sopenharmony_ci			 * If the bit is set, the value written onto
59062306a36Sopenharmony_ci			 * the corresponding page should be either the
59162306a36Sopenharmony_ci			 * previous iteration number or the current one.
59262306a36Sopenharmony_ci			 */
59362306a36Sopenharmony_ci			matched = (*value_ptr == iteration ||
59462306a36Sopenharmony_ci				   *value_ptr == iteration - 1);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci			if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
59762306a36Sopenharmony_ci				if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
59862306a36Sopenharmony_ci					/*
59962306a36Sopenharmony_ci					 * Short answer: this case is special
60062306a36Sopenharmony_ci					 * only for dirty ring test where the
60162306a36Sopenharmony_ci					 * page is the last page before a kvm
60262306a36Sopenharmony_ci					 * dirty ring full in iteration N-2.
60362306a36Sopenharmony_ci					 *
60462306a36Sopenharmony_ci					 * Long answer: Assuming ring size R,
60562306a36Sopenharmony_ci					 * one possible condition is:
60662306a36Sopenharmony_ci					 *
60762306a36Sopenharmony_ci					 *      main thr       vcpu thr
60862306a36Sopenharmony_ci					 *      --------       --------
60962306a36Sopenharmony_ci					 *    iter=1
61062306a36Sopenharmony_ci					 *                   write 1 to page 0~(R-1)
61162306a36Sopenharmony_ci					 *                   full, vmexit
61262306a36Sopenharmony_ci					 *    collect 0~(R-1)
61362306a36Sopenharmony_ci					 *    kick vcpu
61462306a36Sopenharmony_ci					 *                   write 1 to (R-1)~(2R-2)
61562306a36Sopenharmony_ci					 *                   full, vmexit
61662306a36Sopenharmony_ci					 *    iter=2
61762306a36Sopenharmony_ci					 *    collect (R-1)~(2R-2)
61862306a36Sopenharmony_ci					 *    kick vcpu
61962306a36Sopenharmony_ci					 *                   write 1 to (2R-2)
62062306a36Sopenharmony_ci					 *                   (NOTE!!! "1" cached in cpu reg)
62162306a36Sopenharmony_ci					 *                   write 2 to (2R-1)~(3R-3)
62262306a36Sopenharmony_ci					 *                   full, vmexit
62362306a36Sopenharmony_ci					 *    iter=3
62462306a36Sopenharmony_ci					 *    collect (2R-2)~(3R-3)
62562306a36Sopenharmony_ci					 *    (here if we read value on page
62662306a36Sopenharmony_ci					 *     "2R-2" is 1, while iter=3!!!)
62762306a36Sopenharmony_ci					 *
62862306a36Sopenharmony_ci					 * This however can only happen once per iteration.
62962306a36Sopenharmony_ci					 */
63062306a36Sopenharmony_ci					min_iter = iteration - 1;
63162306a36Sopenharmony_ci					continue;
63262306a36Sopenharmony_ci				} else if (page == dirty_ring_last_page) {
63362306a36Sopenharmony_ci					/*
63462306a36Sopenharmony_ci					 * Please refer to comments in
63562306a36Sopenharmony_ci					 * dirty_ring_last_page.
63662306a36Sopenharmony_ci					 */
63762306a36Sopenharmony_ci					continue;
63862306a36Sopenharmony_ci				}
63962306a36Sopenharmony_ci			}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci			TEST_ASSERT(matched,
64262306a36Sopenharmony_ci				    "Set page %"PRIu64" value %"PRIu64
64362306a36Sopenharmony_ci				    " incorrect (iteration=%"PRIu64")",
64462306a36Sopenharmony_ci				    page, *value_ptr, iteration);
64562306a36Sopenharmony_ci		} else {
64662306a36Sopenharmony_ci			host_clear_count++;
64762306a36Sopenharmony_ci			/*
64862306a36Sopenharmony_ci			 * If cleared, the value written can be any
64962306a36Sopenharmony_ci			 * value smaller or equals to the iteration
65062306a36Sopenharmony_ci			 * number.  Note that the value can be exactly
65162306a36Sopenharmony_ci			 * (iteration-1) if that write can happen
65262306a36Sopenharmony_ci			 * like this:
65362306a36Sopenharmony_ci			 *
65462306a36Sopenharmony_ci			 * (1) increase loop count to "iteration-1"
65562306a36Sopenharmony_ci			 * (2) write to page P happens (with value
65662306a36Sopenharmony_ci			 *     "iteration-1")
65762306a36Sopenharmony_ci			 * (3) get dirty log for "iteration-1"; we'll
65862306a36Sopenharmony_ci			 *     see that page P bit is set (dirtied),
65962306a36Sopenharmony_ci			 *     and not set the bit in host_bmap_track
66062306a36Sopenharmony_ci			 * (4) increase loop count to "iteration"
66162306a36Sopenharmony_ci			 *     (which is current iteration)
66262306a36Sopenharmony_ci			 * (5) get dirty log for current iteration,
66362306a36Sopenharmony_ci			 *     we'll see that page P is cleared, with
66462306a36Sopenharmony_ci			 *     value "iteration-1".
66562306a36Sopenharmony_ci			 */
66662306a36Sopenharmony_ci			TEST_ASSERT(*value_ptr <= iteration,
66762306a36Sopenharmony_ci				    "Clear page %"PRIu64" value %"PRIu64
66862306a36Sopenharmony_ci				    " incorrect (iteration=%"PRIu64")",
66962306a36Sopenharmony_ci				    page, *value_ptr, iteration);
67062306a36Sopenharmony_ci			if (*value_ptr == iteration) {
67162306a36Sopenharmony_ci				/*
67262306a36Sopenharmony_ci				 * This page is _just_ modified; it
67362306a36Sopenharmony_ci				 * should report its dirtyness in the
67462306a36Sopenharmony_ci				 * next run
67562306a36Sopenharmony_ci				 */
67662306a36Sopenharmony_ci				__set_bit_le(page, host_bmap_track);
67762306a36Sopenharmony_ci			}
67862306a36Sopenharmony_ci		}
67962306a36Sopenharmony_ci	}
68062306a36Sopenharmony_ci}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_cistatic struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
68362306a36Sopenharmony_ci				uint64_t extra_mem_pages, void *guest_code)
68462306a36Sopenharmony_ci{
68562306a36Sopenharmony_ci	struct kvm_vm *vm;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	vm = __vm_create(mode, 1, extra_mem_pages);
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	log_mode_create_vm_done(vm);
69262306a36Sopenharmony_ci	*vcpu = vm_vcpu_add(vm, 0, guest_code);
69362306a36Sopenharmony_ci	return vm;
69462306a36Sopenharmony_ci}
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_cistruct test_params {
69762306a36Sopenharmony_ci	unsigned long iterations;
69862306a36Sopenharmony_ci	unsigned long interval;
69962306a36Sopenharmony_ci	uint64_t phys_offset;
70062306a36Sopenharmony_ci};
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_cistatic void run_test(enum vm_guest_mode mode, void *arg)
70362306a36Sopenharmony_ci{
70462306a36Sopenharmony_ci	struct test_params *p = arg;
70562306a36Sopenharmony_ci	struct kvm_vcpu *vcpu;
70662306a36Sopenharmony_ci	struct kvm_vm *vm;
70762306a36Sopenharmony_ci	unsigned long *bmap;
70862306a36Sopenharmony_ci	uint32_t ring_buf_idx = 0;
70962306a36Sopenharmony_ci	int sem_val;
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	if (!log_mode_supported()) {
71262306a36Sopenharmony_ci		print_skip("Log mode '%s' not supported",
71362306a36Sopenharmony_ci			   log_modes[host_log_mode].name);
71462306a36Sopenharmony_ci		return;
71562306a36Sopenharmony_ci	}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	/*
71862306a36Sopenharmony_ci	 * We reserve page table for 2 times of extra dirty mem which
71962306a36Sopenharmony_ci	 * will definitely cover the original (1G+) test range.  Here
72062306a36Sopenharmony_ci	 * we do the calculation with 4K page size which is the
72162306a36Sopenharmony_ci	 * smallest so the page number will be enough for all archs
72262306a36Sopenharmony_ci	 * (e.g., 64K page size guest will need even less memory for
72362306a36Sopenharmony_ci	 * page tables).
72462306a36Sopenharmony_ci	 */
72562306a36Sopenharmony_ci	vm = create_vm(mode, &vcpu,
72662306a36Sopenharmony_ci		       2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code);
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	guest_page_size = vm->page_size;
72962306a36Sopenharmony_ci	/*
73062306a36Sopenharmony_ci	 * A little more than 1G of guest page sized pages.  Cover the
73162306a36Sopenharmony_ci	 * case where the size is not aligned to 64 pages.
73262306a36Sopenharmony_ci	 */
73362306a36Sopenharmony_ci	guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
73462306a36Sopenharmony_ci	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	host_page_size = getpagesize();
73762306a36Sopenharmony_ci	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	if (!p->phys_offset) {
74062306a36Sopenharmony_ci		guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
74162306a36Sopenharmony_ci				      guest_page_size;
74262306a36Sopenharmony_ci		guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
74362306a36Sopenharmony_ci	} else {
74462306a36Sopenharmony_ci		guest_test_phys_mem = p->phys_offset;
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci#ifdef __s390x__
74862306a36Sopenharmony_ci	/* Align to 1M (segment size) */
74962306a36Sopenharmony_ci	guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
75062306a36Sopenharmony_ci#endif
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	bmap = bitmap_zalloc(host_num_pages);
75562306a36Sopenharmony_ci	host_bmap_track = bitmap_zalloc(host_num_pages);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	/* Add an extra memory slot for testing dirty logging */
75862306a36Sopenharmony_ci	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
75962306a36Sopenharmony_ci				    guest_test_phys_mem,
76062306a36Sopenharmony_ci				    TEST_MEM_SLOT_INDEX,
76162306a36Sopenharmony_ci				    guest_num_pages,
76262306a36Sopenharmony_ci				    KVM_MEM_LOG_DIRTY_PAGES);
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	/* Do mapping for the dirty track memory slot */
76562306a36Sopenharmony_ci	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	/* Cache the HVA pointer of the region */
76862306a36Sopenharmony_ci	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	/* Export the shared variables to the guest */
77162306a36Sopenharmony_ci	sync_global_to_guest(vm, host_page_size);
77262306a36Sopenharmony_ci	sync_global_to_guest(vm, guest_page_size);
77362306a36Sopenharmony_ci	sync_global_to_guest(vm, guest_test_virt_mem);
77462306a36Sopenharmony_ci	sync_global_to_guest(vm, guest_num_pages);
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	/* Start the iterations */
77762306a36Sopenharmony_ci	iteration = 1;
77862306a36Sopenharmony_ci	sync_global_to_guest(vm, iteration);
77962306a36Sopenharmony_ci	WRITE_ONCE(host_quit, false);
78062306a36Sopenharmony_ci	host_dirty_count = 0;
78162306a36Sopenharmony_ci	host_clear_count = 0;
78262306a36Sopenharmony_ci	host_track_next_count = 0;
78362306a36Sopenharmony_ci	WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	/*
78662306a36Sopenharmony_ci	 * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
78762306a36Sopenharmony_ci	 * that the main task and vCPU worker were synchronized and completed
78862306a36Sopenharmony_ci	 * verification of all iterations.
78962306a36Sopenharmony_ci	 */
79062306a36Sopenharmony_ci	sem_getvalue(&sem_vcpu_stop, &sem_val);
79162306a36Sopenharmony_ci	TEST_ASSERT_EQ(sem_val, 0);
79262306a36Sopenharmony_ci	sem_getvalue(&sem_vcpu_cont, &sem_val);
79362306a36Sopenharmony_ci	TEST_ASSERT_EQ(sem_val, 0);
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	while (iteration < p->iterations) {
79862306a36Sopenharmony_ci		/* Give the vcpu thread some time to dirty some pages */
79962306a36Sopenharmony_ci		usleep(p->interval * 1000);
80062306a36Sopenharmony_ci		log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
80162306a36Sopenharmony_ci					     bmap, host_num_pages,
80262306a36Sopenharmony_ci					     &ring_buf_idx);
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci		/*
80562306a36Sopenharmony_ci		 * See vcpu_sync_stop_requested definition for details on why
80662306a36Sopenharmony_ci		 * we need to stop vcpu when verify data.
80762306a36Sopenharmony_ci		 */
80862306a36Sopenharmony_ci		atomic_set(&vcpu_sync_stop_requested, true);
80962306a36Sopenharmony_ci		sem_wait_until(&sem_vcpu_stop);
81062306a36Sopenharmony_ci		/*
81162306a36Sopenharmony_ci		 * NOTE: for dirty ring, it's possible that we didn't stop at
81262306a36Sopenharmony_ci		 * GUEST_SYNC but instead we stopped because ring is full;
81362306a36Sopenharmony_ci		 * that's okay too because ring full means we're only missing
81462306a36Sopenharmony_ci		 * the flush of the last page, and since we handle the last
81562306a36Sopenharmony_ci		 * page specially verification will succeed anyway.
81662306a36Sopenharmony_ci		 */
81762306a36Sopenharmony_ci		assert(host_log_mode == LOG_MODE_DIRTY_RING ||
81862306a36Sopenharmony_ci		       atomic_read(&vcpu_sync_stop_requested) == false);
81962306a36Sopenharmony_ci		vm_dirty_log_verify(mode, bmap);
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci		/*
82262306a36Sopenharmony_ci		 * Set host_quit before sem_vcpu_cont in the final iteration to
82362306a36Sopenharmony_ci		 * ensure that the vCPU worker doesn't resume the guest.  As
82462306a36Sopenharmony_ci		 * above, the dirty ring test may stop and wait even when not
82562306a36Sopenharmony_ci		 * explicitly request to do so, i.e. would hang waiting for a
82662306a36Sopenharmony_ci		 * "continue" if it's allowed to resume the guest.
82762306a36Sopenharmony_ci		 */
82862306a36Sopenharmony_ci		if (++iteration == p->iterations)
82962306a36Sopenharmony_ci			WRITE_ONCE(host_quit, true);
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci		sem_post(&sem_vcpu_cont);
83262306a36Sopenharmony_ci		sync_global_to_guest(vm, iteration);
83362306a36Sopenharmony_ci	}
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	pthread_join(vcpu_thread, NULL);
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci	pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
83862306a36Sopenharmony_ci		"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
83962306a36Sopenharmony_ci		host_track_next_count);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	free(bmap);
84262306a36Sopenharmony_ci	free(host_bmap_track);
84362306a36Sopenharmony_ci	kvm_vm_free(vm);
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_cistatic void help(char *name)
84762306a36Sopenharmony_ci{
84862306a36Sopenharmony_ci	puts("");
84962306a36Sopenharmony_ci	printf("usage: %s [-h] [-i iterations] [-I interval] "
85062306a36Sopenharmony_ci	       "[-p offset] [-m mode]\n", name);
85162306a36Sopenharmony_ci	puts("");
85262306a36Sopenharmony_ci	printf(" -c: hint to dirty ring size, in number of entries\n");
85362306a36Sopenharmony_ci	printf("     (only useful for dirty-ring test; default: %"PRIu32")\n",
85462306a36Sopenharmony_ci	       TEST_DIRTY_RING_COUNT);
85562306a36Sopenharmony_ci	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
85662306a36Sopenharmony_ci	       TEST_HOST_LOOP_N);
85762306a36Sopenharmony_ci	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
85862306a36Sopenharmony_ci	       TEST_HOST_LOOP_INTERVAL);
85962306a36Sopenharmony_ci	printf(" -p: specify guest physical test memory offset\n"
86062306a36Sopenharmony_ci	       "     Warning: a low offset can conflict with the loaded test code.\n");
86162306a36Sopenharmony_ci	printf(" -M: specify the host logging mode "
86262306a36Sopenharmony_ci	       "(default: run all log modes).  Supported modes: \n\t");
86362306a36Sopenharmony_ci	log_modes_dump();
86462306a36Sopenharmony_ci	guest_modes_help();
86562306a36Sopenharmony_ci	puts("");
86662306a36Sopenharmony_ci	exit(0);
86762306a36Sopenharmony_ci}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ciint main(int argc, char *argv[])
87062306a36Sopenharmony_ci{
87162306a36Sopenharmony_ci	struct test_params p = {
87262306a36Sopenharmony_ci		.iterations = TEST_HOST_LOOP_N,
87362306a36Sopenharmony_ci		.interval = TEST_HOST_LOOP_INTERVAL,
87462306a36Sopenharmony_ci	};
87562306a36Sopenharmony_ci	int opt, i;
87662306a36Sopenharmony_ci	sigset_t sigset;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	sem_init(&sem_vcpu_stop, 0, 0);
87962306a36Sopenharmony_ci	sem_init(&sem_vcpu_cont, 0, 0);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	guest_modes_append_default();
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
88462306a36Sopenharmony_ci		switch (opt) {
88562306a36Sopenharmony_ci		case 'c':
88662306a36Sopenharmony_ci			test_dirty_ring_count = strtol(optarg, NULL, 10);
88762306a36Sopenharmony_ci			break;
88862306a36Sopenharmony_ci		case 'i':
88962306a36Sopenharmony_ci			p.iterations = strtol(optarg, NULL, 10);
89062306a36Sopenharmony_ci			break;
89162306a36Sopenharmony_ci		case 'I':
89262306a36Sopenharmony_ci			p.interval = strtol(optarg, NULL, 10);
89362306a36Sopenharmony_ci			break;
89462306a36Sopenharmony_ci		case 'p':
89562306a36Sopenharmony_ci			p.phys_offset = strtoull(optarg, NULL, 0);
89662306a36Sopenharmony_ci			break;
89762306a36Sopenharmony_ci		case 'm':
89862306a36Sopenharmony_ci			guest_modes_cmdline(optarg);
89962306a36Sopenharmony_ci			break;
90062306a36Sopenharmony_ci		case 'M':
90162306a36Sopenharmony_ci			if (!strcmp(optarg, "all")) {
90262306a36Sopenharmony_ci				host_log_mode_option = LOG_MODE_ALL;
90362306a36Sopenharmony_ci				break;
90462306a36Sopenharmony_ci			}
90562306a36Sopenharmony_ci			for (i = 0; i < LOG_MODE_NUM; i++) {
90662306a36Sopenharmony_ci				if (!strcmp(optarg, log_modes[i].name)) {
90762306a36Sopenharmony_ci					pr_info("Setting log mode to: '%s'\n",
90862306a36Sopenharmony_ci						optarg);
90962306a36Sopenharmony_ci					host_log_mode_option = i;
91062306a36Sopenharmony_ci					break;
91162306a36Sopenharmony_ci				}
91262306a36Sopenharmony_ci			}
91362306a36Sopenharmony_ci			if (i == LOG_MODE_NUM) {
91462306a36Sopenharmony_ci				printf("Log mode '%s' invalid. Please choose "
91562306a36Sopenharmony_ci				       "from: ", optarg);
91662306a36Sopenharmony_ci				log_modes_dump();
91762306a36Sopenharmony_ci				exit(1);
91862306a36Sopenharmony_ci			}
91962306a36Sopenharmony_ci			break;
92062306a36Sopenharmony_ci		case 'h':
92162306a36Sopenharmony_ci		default:
92262306a36Sopenharmony_ci			help(argv[0]);
92362306a36Sopenharmony_ci			break;
92462306a36Sopenharmony_ci		}
92562306a36Sopenharmony_ci	}
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_ci	TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
92862306a36Sopenharmony_ci	TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
93162306a36Sopenharmony_ci		p.iterations, p.interval);
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	srandom(time(0));
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	/* Ensure that vCPU threads start with SIG_IPI blocked.  */
93662306a36Sopenharmony_ci	sigemptyset(&sigset);
93762306a36Sopenharmony_ci	sigaddset(&sigset, SIG_IPI);
93862306a36Sopenharmony_ci	pthread_sigmask(SIG_BLOCK, &sigset, NULL);
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	if (host_log_mode_option == LOG_MODE_ALL) {
94162306a36Sopenharmony_ci		/* Run each log mode */
94262306a36Sopenharmony_ci		for (i = 0; i < LOG_MODE_NUM; i++) {
94362306a36Sopenharmony_ci			pr_info("Testing Log Mode '%s'\n", log_modes[i].name);
94462306a36Sopenharmony_ci			host_log_mode = i;
94562306a36Sopenharmony_ci			for_each_guest_mode(run_test, &p);
94662306a36Sopenharmony_ci		}
94762306a36Sopenharmony_ci	} else {
94862306a36Sopenharmony_ci		host_log_mode = host_log_mode_option;
94962306a36Sopenharmony_ci		for_each_guest_mode(run_test, &p);
95062306a36Sopenharmony_ci	}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	return 0;
95362306a36Sopenharmony_ci}
954