1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * tools/testing/selftests/kvm/lib/kvm_util.c
4 *
5 * Copyright (C) 2018, Google LLC.
6 */
7
8#include "test_util.h"
9#include "kvm_util.h"
10#include "kvm_util_internal.h"
11#include "processor.h"
12
13#include <assert.h>
14#include <sys/mman.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <unistd.h>
18#include <linux/kernel.h>
19
20#define KVM_UTIL_PGS_PER_HUGEPG 512
21#define KVM_UTIL_MIN_PFN	2
22
23/* Aligns x up to the next multiple of size. Size must be a power of 2. */
24static void *align(void *x, size_t size)
25{
26	size_t mask = size - 1;
27	TEST_ASSERT(size != 0 && !(size & (size - 1)),
28		    "size not a power of 2: %lu", size);
29	return (void *) (((size_t) x + mask) & ~mask);
30}
31
32/*
33 * Capability
34 *
35 * Input Args:
36 *   cap - Capability
37 *
38 * Output Args: None
39 *
40 * Return:
41 *   On success, the Value corresponding to the capability (KVM_CAP_*)
42 *   specified by the value of cap.  On failure a TEST_ASSERT failure
43 *   is produced.
44 *
45 * Looks up and returns the value corresponding to the capability
46 * (KVM_CAP_*) given by cap.
47 */
48int kvm_check_cap(long cap)
49{
50	int ret;
51	int kvm_fd;
52
53	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
54	if (kvm_fd < 0)
55		exit(KSFT_SKIP);
56
57	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
58	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
59		"  rc: %i errno: %i", ret, errno);
60
61	close(kvm_fd);
62
63	return ret;
64}
65
66/* VM Enable Capability
67 *
68 * Input Args:
69 *   vm - Virtual Machine
70 *   cap - Capability
71 *
72 * Output Args: None
73 *
74 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
75 *
76 * Enables a capability (KVM_CAP_*) on the VM.
77 */
78int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
79{
80	int ret;
81
82	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
83	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
84		"  rc: %i errno: %i", ret, errno);
85
86	return ret;
87}
88
89/* VCPU Enable Capability
90 *
91 * Input Args:
92 *   vm - Virtual Machine
93 *   vcpu_id - VCPU
94 *   cap - Capability
95 *
96 * Output Args: None
97 *
98 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
99 *
100 * Enables a capability (KVM_CAP_*) on the VCPU.
101 */
102int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
103		    struct kvm_enable_cap *cap)
104{
105	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
106	int r;
107
108	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
109
110	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
111	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
112			"  rc: %i, errno: %i", r, errno);
113
114	return r;
115}
116
117static void vm_open(struct kvm_vm *vm, int perm)
118{
119	vm->kvm_fd = open(KVM_DEV_PATH, perm);
120	if (vm->kvm_fd < 0)
121		exit(KSFT_SKIP);
122
123	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
124		print_skip("immediate_exit not available");
125		exit(KSFT_SKIP);
126	}
127
128	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
129	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
130		"rc: %i errno: %i", vm->fd, errno);
131}
132
133const char * const vm_guest_mode_string[] = {
134	"PA-bits:52,  VA-bits:48,  4K pages",
135	"PA-bits:52,  VA-bits:48, 64K pages",
136	"PA-bits:48,  VA-bits:48,  4K pages",
137	"PA-bits:48,  VA-bits:48, 64K pages",
138	"PA-bits:40,  VA-bits:48,  4K pages",
139	"PA-bits:40,  VA-bits:48, 64K pages",
140	"PA-bits:ANY, VA-bits:48,  4K pages",
141};
142_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
143	       "Missing new mode strings?");
144
145struct vm_guest_mode_params {
146	unsigned int pa_bits;
147	unsigned int va_bits;
148	unsigned int page_size;
149	unsigned int page_shift;
150};
151
152static const struct vm_guest_mode_params vm_guest_mode_params[] = {
153	{ 52, 48,  0x1000, 12 },
154	{ 52, 48, 0x10000, 16 },
155	{ 48, 48,  0x1000, 12 },
156	{ 48, 48, 0x10000, 16 },
157	{ 40, 48,  0x1000, 12 },
158	{ 40, 48, 0x10000, 16 },
159	{  0,  0,  0x1000, 12 },
160};
161_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
162	       "Missing new mode params?");
163
164/*
165 * VM Create
166 *
167 * Input Args:
168 *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
169 *   phy_pages - Physical memory pages
170 *   perm - permission
171 *
172 * Output Args: None
173 *
174 * Return:
175 *   Pointer to opaque structure that describes the created VM.
176 *
177 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
178 * When phy_pages is non-zero, a memory region of phy_pages physical pages
179 * is created and mapped starting at guest physical address 0.  The file
180 * descriptor to control the created VM is created with the permissions
181 * given by perm (e.g. O_RDWR).
182 */
183struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
184{
185	struct kvm_vm *vm;
186
187	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
188		 vm_guest_mode_string(mode), phy_pages, perm);
189
190	vm = calloc(1, sizeof(*vm));
191	TEST_ASSERT(vm != NULL, "Insufficient Memory");
192
193	INIT_LIST_HEAD(&vm->vcpus);
194	INIT_LIST_HEAD(&vm->userspace_mem_regions);
195
196	vm->mode = mode;
197	vm->type = 0;
198
199	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
200	vm->va_bits = vm_guest_mode_params[mode].va_bits;
201	vm->page_size = vm_guest_mode_params[mode].page_size;
202	vm->page_shift = vm_guest_mode_params[mode].page_shift;
203
204	/* Setup mode specific traits. */
205	switch (vm->mode) {
206	case VM_MODE_P52V48_4K:
207		vm->pgtable_levels = 4;
208		break;
209	case VM_MODE_P52V48_64K:
210		vm->pgtable_levels = 3;
211		break;
212	case VM_MODE_P48V48_4K:
213		vm->pgtable_levels = 4;
214		break;
215	case VM_MODE_P48V48_64K:
216		vm->pgtable_levels = 3;
217		break;
218	case VM_MODE_P40V48_4K:
219		vm->pgtable_levels = 4;
220		break;
221	case VM_MODE_P40V48_64K:
222		vm->pgtable_levels = 3;
223		break;
224	case VM_MODE_PXXV48_4K:
225#ifdef __x86_64__
226		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
227		/*
228		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
229		 * it doesn't take effect unless a CR4.LA57 is set, which it
230		 * isn't for this VM_MODE.
231		 */
232		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
233			    "Linear address width (%d bits) not supported",
234			    vm->va_bits);
235		pr_debug("Guest physical address width detected: %d\n",
236			 vm->pa_bits);
237		vm->pgtable_levels = 4;
238		vm->va_bits = 48;
239#else
240		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
241#endif
242		break;
243	default:
244		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
245	}
246
247#ifdef __aarch64__
248	if (vm->pa_bits != 40)
249		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
250#endif
251
252	vm_open(vm, perm);
253
254	/* Limit to VA-bit canonical virtual addresses. */
255	vm->vpages_valid = sparsebit_alloc();
256	sparsebit_set_num(vm->vpages_valid,
257		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
258	sparsebit_set_num(vm->vpages_valid,
259		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
260		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
261
262	/* Limit physical addresses to PA-bits. */
263	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
264
265	/* Allocate and setup memory for guest. */
266	vm->vpages_mapped = sparsebit_alloc();
267	if (phy_pages != 0)
268		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
269					    0, 0, phy_pages, 0);
270
271	return vm;
272}
273
274/*
275 * VM Restart
276 *
277 * Input Args:
278 *   vm - VM that has been released before
279 *   perm - permission
280 *
281 * Output Args: None
282 *
283 * Reopens the file descriptors associated to the VM and reinstates the
284 * global state, such as the irqchip and the memory regions that are mapped
285 * into the guest.
286 */
287void kvm_vm_restart(struct kvm_vm *vmp, int perm)
288{
289	struct userspace_mem_region *region;
290
291	vm_open(vmp, perm);
292	if (vmp->has_irqchip)
293		vm_create_irqchip(vmp);
294
295	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
296		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
297		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
298			    "  rc: %i errno: %i\n"
299			    "  slot: %u flags: 0x%x\n"
300			    "  guest_phys_addr: 0x%llx size: 0x%llx",
301			    ret, errno, region->region.slot,
302			    region->region.flags,
303			    region->region.guest_phys_addr,
304			    region->region.memory_size);
305	}
306}
307
308void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
309{
310	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
311	int ret;
312
313	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
314	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
315		    __func__, strerror(-ret));
316}
317
318void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
319			    uint64_t first_page, uint32_t num_pages)
320{
321	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
322		                            .first_page = first_page,
323	                                    .num_pages = num_pages };
324	int ret;
325
326	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
327	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
328		    __func__, strerror(-ret));
329}
330
331/*
332 * Userspace Memory Region Find
333 *
334 * Input Args:
335 *   vm - Virtual Machine
336 *   start - Starting VM physical address
337 *   end - Ending VM physical address, inclusive.
338 *
339 * Output Args: None
340 *
341 * Return:
342 *   Pointer to overlapping region, NULL if no such region.
343 *
344 * Searches for a region with any physical memory that overlaps with
345 * any portion of the guest physical addresses from start to end
346 * inclusive.  If multiple overlapping regions exist, a pointer to any
347 * of the regions is returned.  Null is returned only when no overlapping
348 * region exists.
349 */
350static struct userspace_mem_region *
351userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
352{
353	struct userspace_mem_region *region;
354
355	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
356		uint64_t existing_start = region->region.guest_phys_addr;
357		uint64_t existing_end = region->region.guest_phys_addr
358			+ region->region.memory_size - 1;
359		if (start <= existing_end && end >= existing_start)
360			return region;
361	}
362
363	return NULL;
364}
365
366/*
367 * KVM Userspace Memory Region Find
368 *
369 * Input Args:
370 *   vm - Virtual Machine
371 *   start - Starting VM physical address
372 *   end - Ending VM physical address, inclusive.
373 *
374 * Output Args: None
375 *
376 * Return:
377 *   Pointer to overlapping region, NULL if no such region.
378 *
379 * Public interface to userspace_mem_region_find. Allows tests to look up
380 * the memslot datastructure for a given range of guest physical memory.
381 */
382struct kvm_userspace_memory_region *
383kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
384				 uint64_t end)
385{
386	struct userspace_mem_region *region;
387
388	region = userspace_mem_region_find(vm, start, end);
389	if (!region)
390		return NULL;
391
392	return &region->region;
393}
394
395/*
396 * VCPU Find
397 *
398 * Input Args:
399 *   vm - Virtual Machine
400 *   vcpuid - VCPU ID
401 *
402 * Output Args: None
403 *
404 * Return:
405 *   Pointer to VCPU structure
406 *
407 * Locates a vcpu structure that describes the VCPU specified by vcpuid and
408 * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
409 * for the specified vcpuid.
410 */
411struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
412{
413	struct vcpu *vcpu;
414
415	list_for_each_entry(vcpu, &vm->vcpus, list) {
416		if (vcpu->id == vcpuid)
417			return vcpu;
418	}
419
420	return NULL;
421}
422
423/*
424 * VM VCPU Remove
425 *
426 * Input Args:
427 *   vcpu - VCPU to remove
428 *
429 * Output Args: None
430 *
431 * Return: None, TEST_ASSERT failures for all error conditions
432 *
433 * Removes a vCPU from a VM and frees its resources.
434 */
435static void vm_vcpu_rm(struct vcpu *vcpu)
436{
437	int ret;
438
439	ret = munmap(vcpu->state, sizeof(*vcpu->state));
440	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
441		"errno: %i", ret, errno);
442	close(vcpu->fd);
443	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
444		"errno: %i", ret, errno);
445
446	list_del(&vcpu->list);
447	free(vcpu);
448}
449
450void kvm_vm_release(struct kvm_vm *vmp)
451{
452	struct vcpu *vcpu, *tmp;
453	int ret;
454
455	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
456		vm_vcpu_rm(vcpu);
457
458	ret = close(vmp->fd);
459	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
460		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
461
462	close(vmp->kvm_fd);
463	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
464		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
465}
466
467static void __vm_mem_region_delete(struct kvm_vm *vm,
468				   struct userspace_mem_region *region)
469{
470	int ret;
471
472	list_del(&region->list);
473
474	region->region.memory_size = 0;
475	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
476	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
477		    "rc: %i errno: %i", ret, errno);
478
479	sparsebit_free(&region->unused_phy_pages);
480	ret = munmap(region->mmap_start, region->mmap_size);
481	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
482
483	free(region);
484}
485
486/*
487 * Destroys and frees the VM pointed to by vmp.
488 */
489void kvm_vm_free(struct kvm_vm *vmp)
490{
491	struct userspace_mem_region *region, *tmp;
492
493	if (vmp == NULL)
494		return;
495
496	/* Free userspace_mem_regions. */
497	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
498		__vm_mem_region_delete(vmp, region);
499
500	/* Free sparsebit arrays. */
501	sparsebit_free(&vmp->vpages_valid);
502	sparsebit_free(&vmp->vpages_mapped);
503
504	kvm_vm_release(vmp);
505
506	/* Free the structure describing the VM. */
507	free(vmp);
508}
509
510/*
511 * Memory Compare, host virtual to guest virtual
512 *
513 * Input Args:
514 *   hva - Starting host virtual address
515 *   vm - Virtual Machine
516 *   gva - Starting guest virtual address
517 *   len - number of bytes to compare
518 *
519 * Output Args: None
520 *
521 * Input/Output Args: None
522 *
523 * Return:
524 *   Returns 0 if the bytes starting at hva for a length of len
525 *   are equal the guest virtual bytes starting at gva.  Returns
526 *   a value < 0, if bytes at hva are less than those at gva.
527 *   Otherwise a value > 0 is returned.
528 *
529 * Compares the bytes starting at the host virtual address hva, for
530 * a length of len, to the guest bytes starting at the guest virtual
531 * address given by gva.
532 */
533int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
534{
535	size_t amt;
536
537	/*
538	 * Compare a batch of bytes until either a match is found
539	 * or all the bytes have been compared.
540	 */
541	for (uintptr_t offset = 0; offset < len; offset += amt) {
542		uintptr_t ptr1 = (uintptr_t)hva + offset;
543
544		/*
545		 * Determine host address for guest virtual address
546		 * at offset.
547		 */
548		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
549
550		/*
551		 * Determine amount to compare on this pass.
552		 * Don't allow the comparsion to cross a page boundary.
553		 */
554		amt = len - offset;
555		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
556			amt = vm->page_size - (ptr1 % vm->page_size);
557		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
558			amt = vm->page_size - (ptr2 % vm->page_size);
559
560		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
561		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
562
563		/*
564		 * Perform the comparison.  If there is a difference
565		 * return that result to the caller, otherwise need
566		 * to continue on looking for a mismatch.
567		 */
568		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
569		if (ret != 0)
570			return ret;
571	}
572
573	/*
574	 * No mismatch found.  Let the caller know the two memory
575	 * areas are equal.
576	 */
577	return 0;
578}
579
580/*
581 * VM Userspace Memory Region Add
582 *
583 * Input Args:
584 *   vm - Virtual Machine
585 *   backing_src - Storage source for this region.
586 *                 NULL to use anonymous memory.
587 *   guest_paddr - Starting guest physical address
588 *   slot - KVM region slot
589 *   npages - Number of physical pages
590 *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
591 *
592 * Output Args: None
593 *
594 * Return: None
595 *
596 * Allocates a memory area of the number of pages specified by npages
597 * and maps it to the VM specified by vm, at a starting physical address
598 * given by guest_paddr.  The region is created with a KVM region slot
599 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
600 * region is created with the flags given by flags.
601 */
602void vm_userspace_mem_region_add(struct kvm_vm *vm,
603	enum vm_mem_backing_src_type src_type,
604	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
605	uint32_t flags)
606{
607	int ret;
608	struct userspace_mem_region *region;
609	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
610	size_t alignment;
611
612	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
613		"Number of guest pages is not compatible with the host. "
614		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
615
616	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
617		"address not on a page boundary.\n"
618		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
619		guest_paddr, vm->page_size);
620	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
621		<= vm->max_gfn, "Physical range beyond maximum "
622		"supported physical address,\n"
623		"  guest_paddr: 0x%lx npages: 0x%lx\n"
624		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
625		guest_paddr, npages, vm->max_gfn, vm->page_size);
626
627	/*
628	 * Confirm a mem region with an overlapping address doesn't
629	 * already exist.
630	 */
631	region = (struct userspace_mem_region *) userspace_mem_region_find(
632		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
633	if (region != NULL)
634		TEST_FAIL("overlapping userspace_mem_region already "
635			"exists\n"
636			"  requested guest_paddr: 0x%lx npages: 0x%lx "
637			"page_size: 0x%x\n"
638			"  existing guest_paddr: 0x%lx size: 0x%lx",
639			guest_paddr, npages, vm->page_size,
640			(uint64_t) region->region.guest_phys_addr,
641			(uint64_t) region->region.memory_size);
642
643	/* Confirm no region with the requested slot already exists. */
644	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
645		if (region->region.slot != slot)
646			continue;
647
648		TEST_FAIL("A mem region with the requested slot "
649			"already exists.\n"
650			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
651			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
652			slot, guest_paddr, npages,
653			region->region.slot,
654			(uint64_t) region->region.guest_phys_addr,
655			(uint64_t) region->region.memory_size);
656	}
657
658	/* Allocate and initialize new mem region structure. */
659	region = calloc(1, sizeof(*region));
660	TEST_ASSERT(region != NULL, "Insufficient Memory");
661	region->mmap_size = npages * vm->page_size;
662
663#ifdef __s390x__
664	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
665	alignment = 0x100000;
666#else
667	alignment = 1;
668#endif
669
670	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
671		alignment = max(huge_page_size, alignment);
672
673	/* Add enough memory to align up if necessary */
674	if (alignment > 1)
675		region->mmap_size += alignment;
676
677	region->mmap_start = mmap(NULL, region->mmap_size,
678				  PROT_READ | PROT_WRITE,
679				  MAP_PRIVATE | MAP_ANONYMOUS
680				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
681				  -1, 0);
682	TEST_ASSERT(region->mmap_start != MAP_FAILED,
683		    "test_malloc failed, mmap_start: %p errno: %i",
684		    region->mmap_start, errno);
685
686	/* Align host address */
687	region->host_mem = align(region->mmap_start, alignment);
688
689	/* As needed perform madvise */
690	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
691		struct stat statbuf;
692
693		ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
694		TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
695			    "stat /sys/kernel/mm/transparent_hugepage");
696
697		TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
698			    "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
699
700		if (ret == 0) {
701			ret = madvise(region->host_mem, npages * vm->page_size,
702				      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
703			TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
704				    region->host_mem, npages * vm->page_size, src_type);
705		}
706	}
707
708	region->unused_phy_pages = sparsebit_alloc();
709	sparsebit_set_num(region->unused_phy_pages,
710		guest_paddr >> vm->page_shift, npages);
711	region->region.slot = slot;
712	region->region.flags = flags;
713	region->region.guest_phys_addr = guest_paddr;
714	region->region.memory_size = npages * vm->page_size;
715	region->region.userspace_addr = (uintptr_t) region->host_mem;
716	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
717	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
718		"  rc: %i errno: %i\n"
719		"  slot: %u flags: 0x%x\n"
720		"  guest_phys_addr: 0x%lx size: 0x%lx",
721		ret, errno, slot, flags,
722		guest_paddr, (uint64_t) region->region.memory_size);
723
724	/* Add to linked-list of memory regions. */
725	list_add(&region->list, &vm->userspace_mem_regions);
726}
727
728/*
729 * Memslot to region
730 *
731 * Input Args:
732 *   vm - Virtual Machine
733 *   memslot - KVM memory slot ID
734 *
735 * Output Args: None
736 *
737 * Return:
738 *   Pointer to memory region structure that describe memory region
739 *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
740 *   on error (e.g. currently no memory region using memslot as a KVM
741 *   memory slot ID).
742 */
743struct userspace_mem_region *
744memslot2region(struct kvm_vm *vm, uint32_t memslot)
745{
746	struct userspace_mem_region *region;
747
748	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
749		if (region->region.slot == memslot)
750			return region;
751	}
752
753	fprintf(stderr, "No mem region with the requested slot found,\n"
754		"  requested slot: %u\n", memslot);
755	fputs("---- vm dump ----\n", stderr);
756	vm_dump(stderr, vm, 2);
757	TEST_FAIL("Mem region not found");
758	return NULL;
759}
760
761/*
762 * VM Memory Region Flags Set
763 *
764 * Input Args:
765 *   vm - Virtual Machine
766 *   flags - Starting guest physical address
767 *
768 * Output Args: None
769 *
770 * Return: None
771 *
772 * Sets the flags of the memory region specified by the value of slot,
773 * to the values given by flags.
774 */
775void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
776{
777	int ret;
778	struct userspace_mem_region *region;
779
780	region = memslot2region(vm, slot);
781
782	region->region.flags = flags;
783
784	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
785
786	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
787		"  rc: %i errno: %i slot: %u flags: 0x%x",
788		ret, errno, slot, flags);
789}
790
791/*
792 * VM Memory Region Move
793 *
794 * Input Args:
795 *   vm - Virtual Machine
796 *   slot - Slot of the memory region to move
797 *   new_gpa - Starting guest physical address
798 *
799 * Output Args: None
800 *
801 * Return: None
802 *
803 * Change the gpa of a memory region.
804 */
805void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
806{
807	struct userspace_mem_region *region;
808	int ret;
809
810	region = memslot2region(vm, slot);
811
812	region->region.guest_phys_addr = new_gpa;
813
814	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
815
816	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
817		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
818		    ret, errno, slot, new_gpa);
819}
820
821/*
822 * VM Memory Region Delete
823 *
824 * Input Args:
825 *   vm - Virtual Machine
826 *   slot - Slot of the memory region to delete
827 *
828 * Output Args: None
829 *
830 * Return: None
831 *
832 * Delete a memory region.
833 */
834void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
835{
836	__vm_mem_region_delete(vm, memslot2region(vm, slot));
837}
838
839/*
840 * VCPU mmap Size
841 *
842 * Input Args: None
843 *
844 * Output Args: None
845 *
846 * Return:
847 *   Size of VCPU state
848 *
849 * Returns the size of the structure pointed to by the return value
850 * of vcpu_state().
851 */
852static int vcpu_mmap_sz(void)
853{
854	int dev_fd, ret;
855
856	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
857	if (dev_fd < 0)
858		exit(KSFT_SKIP);
859
860	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
861	TEST_ASSERT(ret >= sizeof(struct kvm_run),
862		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
863		__func__, ret, errno);
864
865	close(dev_fd);
866
867	return ret;
868}
869
870/*
871 * VM VCPU Add
872 *
873 * Input Args:
874 *   vm - Virtual Machine
875 *   vcpuid - VCPU ID
876 *
877 * Output Args: None
878 *
879 * Return: None
880 *
881 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
882 * No additional VCPU setup is done.
883 */
884void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
885{
886	struct vcpu *vcpu;
887
888	/* Confirm a vcpu with the specified id doesn't already exist. */
889	vcpu = vcpu_find(vm, vcpuid);
890	if (vcpu != NULL)
891		TEST_FAIL("vcpu with the specified id "
892			"already exists,\n"
893			"  requested vcpuid: %u\n"
894			"  existing vcpuid: %u state: %p",
895			vcpuid, vcpu->id, vcpu->state);
896
897	/* Allocate and initialize new vcpu structure. */
898	vcpu = calloc(1, sizeof(*vcpu));
899	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
900	vcpu->id = vcpuid;
901	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
902	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
903		vcpu->fd, errno);
904
905	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
906		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
907		vcpu_mmap_sz(), sizeof(*vcpu->state));
908	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
909		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
910	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
911		"vcpu id: %u errno: %i", vcpuid, errno);
912
913	/* Add to linked-list of VCPUs. */
914	list_add(&vcpu->list, &vm->vcpus);
915}
916
917/*
918 * VM Virtual Address Unused Gap
919 *
920 * Input Args:
921 *   vm - Virtual Machine
922 *   sz - Size (bytes)
923 *   vaddr_min - Minimum Virtual Address
924 *
925 * Output Args: None
926 *
927 * Return:
928 *   Lowest virtual address at or below vaddr_min, with at least
929 *   sz unused bytes.  TEST_ASSERT failure if no area of at least
930 *   size sz is available.
931 *
932 * Within the VM specified by vm, locates the lowest starting virtual
933 * address >= vaddr_min, that has at least sz unallocated bytes.  A
934 * TEST_ASSERT failure occurs for invalid input or no area of at least
935 * sz unallocated bytes >= vaddr_min is available.
936 */
937static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
938				      vm_vaddr_t vaddr_min)
939{
940	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
941
942	/* Determine lowest permitted virtual page index. */
943	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
944	if ((pgidx_start * vm->page_size) < vaddr_min)
945		goto no_va_found;
946
947	/* Loop over section with enough valid virtual page indexes. */
948	if (!sparsebit_is_set_num(vm->vpages_valid,
949		pgidx_start, pages))
950		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
951			pgidx_start, pages);
952	do {
953		/*
954		 * Are there enough unused virtual pages available at
955		 * the currently proposed starting virtual page index.
956		 * If not, adjust proposed starting index to next
957		 * possible.
958		 */
959		if (sparsebit_is_clear_num(vm->vpages_mapped,
960			pgidx_start, pages))
961			goto va_found;
962		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
963			pgidx_start, pages);
964		if (pgidx_start == 0)
965			goto no_va_found;
966
967		/*
968		 * If needed, adjust proposed starting virtual address,
969		 * to next range of valid virtual addresses.
970		 */
971		if (!sparsebit_is_set_num(vm->vpages_valid,
972			pgidx_start, pages)) {
973			pgidx_start = sparsebit_next_set_num(
974				vm->vpages_valid, pgidx_start, pages);
975			if (pgidx_start == 0)
976				goto no_va_found;
977		}
978	} while (pgidx_start != 0);
979
980no_va_found:
981	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
982
983	/* NOT REACHED */
984	return -1;
985
986va_found:
987	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
988		pgidx_start, pages),
989		"Unexpected, invalid virtual page index range,\n"
990		"  pgidx_start: 0x%lx\n"
991		"  pages: 0x%lx",
992		pgidx_start, pages);
993	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
994		pgidx_start, pages),
995		"Unexpected, pages already mapped,\n"
996		"  pgidx_start: 0x%lx\n"
997		"  pages: 0x%lx",
998		pgidx_start, pages);
999
1000	return pgidx_start * vm->page_size;
1001}
1002
1003/*
1004 * VM Virtual Address Allocate
1005 *
1006 * Input Args:
1007 *   vm - Virtual Machine
1008 *   sz - Size in bytes
1009 *   vaddr_min - Minimum starting virtual address
1010 *   data_memslot - Memory region slot for data pages
1011 *   pgd_memslot - Memory region slot for new virtual translation tables
1012 *
1013 * Output Args: None
1014 *
1015 * Return:
1016 *   Starting guest virtual address
1017 *
1018 * Allocates at least sz bytes within the virtual address space of the vm
1019 * given by vm.  The allocated bytes are mapped to a virtual address >=
1020 * the address given by vaddr_min.  Note that each allocation uses a
1021 * a unique set of pages, with the minimum real allocation being at least
1022 * a page.
1023 */
1024vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
1025			  uint32_t data_memslot, uint32_t pgd_memslot)
1026{
1027	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1028
1029	virt_pgd_alloc(vm, pgd_memslot);
1030
1031	/*
1032	 * Find an unused range of virtual page addresses of at least
1033	 * pages in length.
1034	 */
1035	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1036
1037	/* Map the virtual pages. */
1038	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1039		pages--, vaddr += vm->page_size) {
1040		vm_paddr_t paddr;
1041
1042		paddr = vm_phy_page_alloc(vm,
1043				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
1044
1045		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
1046
1047		sparsebit_set(vm->vpages_mapped,
1048			vaddr >> vm->page_shift);
1049	}
1050
1051	return vaddr_start;
1052}
1053
1054/*
1055 * Map a range of VM virtual address to the VM's physical address
1056 *
1057 * Input Args:
1058 *   vm - Virtual Machine
1059 *   vaddr - Virtuall address to map
1060 *   paddr - VM Physical Address
1061 *   npages - The number of pages to map
1062 *   pgd_memslot - Memory region slot for new virtual translation tables
1063 *
1064 * Output Args: None
1065 *
1066 * Return: None
1067 *
1068 * Within the VM given by @vm, creates a virtual translation for
1069 * @npages starting at @vaddr to the page range starting at @paddr.
1070 */
1071void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1072	      unsigned int npages, uint32_t pgd_memslot)
1073{
1074	size_t page_size = vm->page_size;
1075	size_t size = npages * page_size;
1076
1077	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1078	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1079
1080	while (npages--) {
1081		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
1082		vaddr += page_size;
1083		paddr += page_size;
1084	}
1085}
1086
1087/*
1088 * Address VM Physical to Host Virtual
1089 *
1090 * Input Args:
1091 *   vm - Virtual Machine
1092 *   gpa - VM physical address
1093 *
1094 * Output Args: None
1095 *
1096 * Return:
1097 *   Equivalent host virtual address
1098 *
1099 * Locates the memory region containing the VM physical address given
1100 * by gpa, within the VM given by vm.  When found, the host virtual
1101 * address providing the memory to the vm physical address is returned.
1102 * A TEST_ASSERT failure occurs if no region containing gpa exists.
1103 */
1104void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1105{
1106	struct userspace_mem_region *region;
1107
1108	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1109		if ((gpa >= region->region.guest_phys_addr)
1110			&& (gpa <= (region->region.guest_phys_addr
1111				+ region->region.memory_size - 1)))
1112			return (void *) ((uintptr_t) region->host_mem
1113				+ (gpa - region->region.guest_phys_addr));
1114	}
1115
1116	TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1117	return NULL;
1118}
1119
1120/*
1121 * Address Host Virtual to VM Physical
1122 *
1123 * Input Args:
1124 *   vm - Virtual Machine
1125 *   hva - Host virtual address
1126 *
1127 * Output Args: None
1128 *
1129 * Return:
1130 *   Equivalent VM physical address
1131 *
1132 * Locates the memory region containing the host virtual address given
1133 * by hva, within the VM given by vm.  When found, the equivalent
1134 * VM physical address is returned. A TEST_ASSERT failure occurs if no
1135 * region containing hva exists.
1136 */
1137vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1138{
1139	struct userspace_mem_region *region;
1140
1141	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1142		if ((hva >= region->host_mem)
1143			&& (hva <= (region->host_mem
1144				+ region->region.memory_size - 1)))
1145			return (vm_paddr_t) ((uintptr_t)
1146				region->region.guest_phys_addr
1147				+ (hva - (uintptr_t) region->host_mem));
1148	}
1149
1150	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1151	return -1;
1152}
1153
1154/*
1155 * VM Create IRQ Chip
1156 *
1157 * Input Args:
1158 *   vm - Virtual Machine
1159 *
1160 * Output Args: None
1161 *
1162 * Return: None
1163 *
1164 * Creates an interrupt controller chip for the VM specified by vm.
1165 */
1166void vm_create_irqchip(struct kvm_vm *vm)
1167{
1168	int ret;
1169
1170	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1171	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1172		"rc: %i errno: %i", ret, errno);
1173
1174	vm->has_irqchip = true;
1175}
1176
1177/*
1178 * VM VCPU State
1179 *
1180 * Input Args:
1181 *   vm - Virtual Machine
1182 *   vcpuid - VCPU ID
1183 *
1184 * Output Args: None
1185 *
1186 * Return:
1187 *   Pointer to structure that describes the state of the VCPU.
1188 *
1189 * Locates and returns a pointer to a structure that describes the
1190 * state of the VCPU with the given vcpuid.
1191 */
1192struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1193{
1194	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1195	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1196
1197	return vcpu->state;
1198}
1199
1200/*
1201 * VM VCPU Run
1202 *
1203 * Input Args:
1204 *   vm - Virtual Machine
1205 *   vcpuid - VCPU ID
1206 *
1207 * Output Args: None
1208 *
1209 * Return: None
1210 *
1211 * Switch to executing the code for the VCPU given by vcpuid, within the VM
1212 * given by vm.
1213 */
1214void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1215{
1216	int ret = _vcpu_run(vm, vcpuid);
1217	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1218		"rc: %i errno: %i", ret, errno);
1219}
1220
1221int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1222{
1223	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1224	int rc;
1225
1226	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1227	do {
1228		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1229	} while (rc == -1 && errno == EINTR);
1230
1231	assert_on_unhandled_exception(vm, vcpuid);
1232
1233	return rc;
1234}
1235
1236void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1237{
1238	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1239	int ret;
1240
1241	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1242
1243	vcpu->state->immediate_exit = 1;
1244	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1245	vcpu->state->immediate_exit = 0;
1246
1247	TEST_ASSERT(ret == -1 && errno == EINTR,
1248		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1249		    ret, errno);
1250}
1251
1252void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
1253			  struct kvm_guest_debug *debug)
1254{
1255	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1256	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
1257
1258	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
1259}
1260
1261/*
1262 * VM VCPU Set MP State
1263 *
1264 * Input Args:
1265 *   vm - Virtual Machine
1266 *   vcpuid - VCPU ID
1267 *   mp_state - mp_state to be set
1268 *
1269 * Output Args: None
1270 *
1271 * Return: None
1272 *
1273 * Sets the MP state of the VCPU given by vcpuid, to the state given
1274 * by mp_state.
1275 */
1276void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1277		       struct kvm_mp_state *mp_state)
1278{
1279	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1280	int ret;
1281
1282	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1283
1284	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1285	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1286		"rc: %i errno: %i", ret, errno);
1287}
1288
1289/*
1290 * VM VCPU Get Reg List
1291 *
1292 * Input Args:
1293 *   vm - Virtual Machine
1294 *   vcpuid - VCPU ID
1295 *
1296 * Output Args:
1297 *   None
1298 *
1299 * Return:
1300 *   A pointer to an allocated struct kvm_reg_list
1301 *
1302 * Get the list of guest registers which are supported for
1303 * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
1304 */
1305struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
1306{
1307	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1308	int ret;
1309
1310	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
1311	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1312	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1313	reg_list->n = reg_list_n.n;
1314	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
1315	return reg_list;
1316}
1317
1318/*
1319 * VM VCPU Regs Get
1320 *
1321 * Input Args:
1322 *   vm - Virtual Machine
1323 *   vcpuid - VCPU ID
1324 *
1325 * Output Args:
1326 *   regs - current state of VCPU regs
1327 *
1328 * Return: None
1329 *
1330 * Obtains the current register state for the VCPU specified by vcpuid
1331 * and stores it at the location given by regs.
1332 */
1333void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1334{
1335	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1336	int ret;
1337
1338	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1339
1340	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1341	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1342		ret, errno);
1343}
1344
1345/*
1346 * VM VCPU Regs Set
1347 *
1348 * Input Args:
1349 *   vm - Virtual Machine
1350 *   vcpuid - VCPU ID
1351 *   regs - Values to set VCPU regs to
1352 *
1353 * Output Args: None
1354 *
1355 * Return: None
1356 *
1357 * Sets the regs of the VCPU specified by vcpuid to the values
1358 * given by regs.
1359 */
1360void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1361{
1362	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1363	int ret;
1364
1365	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1366
1367	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1368	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1369		ret, errno);
1370}
1371
1372#ifdef __KVM_HAVE_VCPU_EVENTS
1373void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1374		     struct kvm_vcpu_events *events)
1375{
1376	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1377	int ret;
1378
1379	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1380
1381	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1382	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1383		ret, errno);
1384}
1385
1386void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1387		     struct kvm_vcpu_events *events)
1388{
1389	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1390	int ret;
1391
1392	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1393
1394	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1395	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1396		ret, errno);
1397}
1398#endif
1399
1400#ifdef __x86_64__
1401void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1402			   struct kvm_nested_state *state)
1403{
1404	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1405	int ret;
1406
1407	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1408
1409	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1410	TEST_ASSERT(ret == 0,
1411		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1412		ret, errno);
1413}
1414
1415int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1416			  struct kvm_nested_state *state, bool ignore_error)
1417{
1418	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1419	int ret;
1420
1421	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1422
1423	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1424	if (!ignore_error) {
1425		TEST_ASSERT(ret == 0,
1426			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1427			ret, errno);
1428	}
1429
1430	return ret;
1431}
1432#endif
1433
1434/*
1435 * VM VCPU System Regs Get
1436 *
1437 * Input Args:
1438 *   vm - Virtual Machine
1439 *   vcpuid - VCPU ID
1440 *
1441 * Output Args:
1442 *   sregs - current state of VCPU system regs
1443 *
1444 * Return: None
1445 *
1446 * Obtains the current system register state for the VCPU specified by
1447 * vcpuid and stores it at the location given by sregs.
1448 */
1449void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1450{
1451	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1452	int ret;
1453
1454	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1455
1456	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1457	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1458		ret, errno);
1459}
1460
1461/*
1462 * VM VCPU System Regs Set
1463 *
1464 * Input Args:
1465 *   vm - Virtual Machine
1466 *   vcpuid - VCPU ID
1467 *   sregs - Values to set VCPU system regs to
1468 *
1469 * Output Args: None
1470 *
1471 * Return: None
1472 *
1473 * Sets the system regs of the VCPU specified by vcpuid to the values
1474 * given by sregs.
1475 */
1476void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1477{
1478	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1479	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1480		"rc: %i errno: %i", ret, errno);
1481}
1482
1483int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1484{
1485	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1486
1487	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1488
1489	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1490}
1491
1492void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1493{
1494	int ret;
1495
1496	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
1497	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
1498		    ret, errno, strerror(errno));
1499}
1500
1501void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1502{
1503	int ret;
1504
1505	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
1506	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
1507		    ret, errno, strerror(errno));
1508}
1509
1510void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1511{
1512	int ret;
1513
1514	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
1515	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
1516		    ret, errno, strerror(errno));
1517}
1518
1519void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1520{
1521	int ret;
1522
1523	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
1524	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
1525		    ret, errno, strerror(errno));
1526}
1527
1528/*
1529 * VCPU Ioctl
1530 *
1531 * Input Args:
1532 *   vm - Virtual Machine
1533 *   vcpuid - VCPU ID
1534 *   cmd - Ioctl number
1535 *   arg - Argument to pass to the ioctl
1536 *
1537 * Return: None
1538 *
1539 * Issues an arbitrary ioctl on a VCPU fd.
1540 */
1541void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1542		unsigned long cmd, void *arg)
1543{
1544	int ret;
1545
1546	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1547	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1548		cmd, ret, errno, strerror(errno));
1549}
1550
1551int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1552		unsigned long cmd, void *arg)
1553{
1554	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1555	int ret;
1556
1557	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1558
1559	ret = ioctl(vcpu->fd, cmd, arg);
1560
1561	return ret;
1562}
1563
1564/*
1565 * VM Ioctl
1566 *
1567 * Input Args:
1568 *   vm - Virtual Machine
1569 *   cmd - Ioctl number
1570 *   arg - Argument to pass to the ioctl
1571 *
1572 * Return: None
1573 *
1574 * Issues an arbitrary ioctl on a VM fd.
1575 */
1576void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1577{
1578	int ret;
1579
1580	ret = ioctl(vm->fd, cmd, arg);
1581	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1582		cmd, ret, errno, strerror(errno));
1583}
1584
1585/*
1586 * VM Dump
1587 *
1588 * Input Args:
1589 *   vm - Virtual Machine
1590 *   indent - Left margin indent amount
1591 *
1592 * Output Args:
1593 *   stream - Output FILE stream
1594 *
1595 * Return: None
1596 *
1597 * Dumps the current state of the VM given by vm, to the FILE stream
1598 * given by stream.
1599 */
1600void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1601{
1602	struct userspace_mem_region *region;
1603	struct vcpu *vcpu;
1604
1605	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1606	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1607	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1608	fprintf(stream, "%*sMem Regions:\n", indent, "");
1609	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1610		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1611			"host_virt: %p\n", indent + 2, "",
1612			(uint64_t) region->region.guest_phys_addr,
1613			(uint64_t) region->region.memory_size,
1614			region->host_mem);
1615		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1616		sparsebit_dump(stream, region->unused_phy_pages, 0);
1617	}
1618	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1619	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1620	fprintf(stream, "%*spgd_created: %u\n", indent, "",
1621		vm->pgd_created);
1622	if (vm->pgd_created) {
1623		fprintf(stream, "%*sVirtual Translation Tables:\n",
1624			indent + 2, "");
1625		virt_dump(stream, vm, indent + 4);
1626	}
1627	fprintf(stream, "%*sVCPUs:\n", indent, "");
1628	list_for_each_entry(vcpu, &vm->vcpus, list)
1629		vcpu_dump(stream, vm, vcpu->id, indent + 2);
1630}
1631
1632/* Known KVM exit reasons */
1633static struct exit_reason {
1634	unsigned int reason;
1635	const char *name;
1636} exit_reasons_known[] = {
1637	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
1638	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
1639	{KVM_EXIT_IO, "IO"},
1640	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
1641	{KVM_EXIT_DEBUG, "DEBUG"},
1642	{KVM_EXIT_HLT, "HLT"},
1643	{KVM_EXIT_MMIO, "MMIO"},
1644	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
1645	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
1646	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
1647	{KVM_EXIT_INTR, "INTR"},
1648	{KVM_EXIT_SET_TPR, "SET_TPR"},
1649	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
1650	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
1651	{KVM_EXIT_S390_RESET, "S390_RESET"},
1652	{KVM_EXIT_DCR, "DCR"},
1653	{KVM_EXIT_NMI, "NMI"},
1654	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
1655	{KVM_EXIT_OSI, "OSI"},
1656	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
1657#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
1658	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
1659#endif
1660};
1661
1662/*
1663 * Exit Reason String
1664 *
1665 * Input Args:
1666 *   exit_reason - Exit reason
1667 *
1668 * Output Args: None
1669 *
1670 * Return:
1671 *   Constant string pointer describing the exit reason.
1672 *
1673 * Locates and returns a constant string that describes the KVM exit
1674 * reason given by exit_reason.  If no such string is found, a constant
1675 * string of "Unknown" is returned.
1676 */
1677const char *exit_reason_str(unsigned int exit_reason)
1678{
1679	unsigned int n1;
1680
1681	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
1682		if (exit_reason == exit_reasons_known[n1].reason)
1683			return exit_reasons_known[n1].name;
1684	}
1685
1686	return "Unknown";
1687}
1688
1689/*
1690 * Physical Contiguous Page Allocator
1691 *
1692 * Input Args:
1693 *   vm - Virtual Machine
1694 *   num - number of pages
1695 *   paddr_min - Physical address minimum
1696 *   memslot - Memory region to allocate page from
1697 *
1698 * Output Args: None
1699 *
1700 * Return:
1701 *   Starting physical address
1702 *
1703 * Within the VM specified by vm, locates a range of available physical
1704 * pages at or above paddr_min. If found, the pages are marked as in use
1705 * and their base address is returned. A TEST_ASSERT failure occurs if
1706 * not enough pages are available at or above paddr_min.
1707 */
1708vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
1709			      vm_paddr_t paddr_min, uint32_t memslot)
1710{
1711	struct userspace_mem_region *region;
1712	sparsebit_idx_t pg, base;
1713
1714	TEST_ASSERT(num > 0, "Must allocate at least one page");
1715
1716	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1717		"not divisible by page size.\n"
1718		"  paddr_min: 0x%lx page_size: 0x%x",
1719		paddr_min, vm->page_size);
1720
1721	region = memslot2region(vm, memslot);
1722	base = pg = paddr_min >> vm->page_shift;
1723
1724	do {
1725		for (; pg < base + num; ++pg) {
1726			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
1727				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
1728				break;
1729			}
1730		}
1731	} while (pg && pg != base + num);
1732
1733	if (pg == 0) {
1734		fprintf(stderr, "No guest physical page available, "
1735			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
1736			paddr_min, vm->page_size, memslot);
1737		fputs("---- vm dump ----\n", stderr);
1738		vm_dump(stderr, vm, 2);
1739		abort();
1740	}
1741
1742	for (pg = base; pg < base + num; ++pg)
1743		sparsebit_clear(region->unused_phy_pages, pg);
1744
1745	return base * vm->page_size;
1746}
1747
1748vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
1749			     uint32_t memslot)
1750{
1751	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
1752}
1753
1754/*
1755 * Address Guest Virtual to Host Virtual
1756 *
1757 * Input Args:
1758 *   vm - Virtual Machine
1759 *   gva - VM virtual address
1760 *
1761 * Output Args: None
1762 *
1763 * Return:
1764 *   Equivalent host virtual address
1765 */
1766void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
1767{
1768	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
1769}
1770
1771/*
1772 * Is Unrestricted Guest
1773 *
1774 * Input Args:
1775 *   vm - Virtual Machine
1776 *
1777 * Output Args: None
1778 *
1779 * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
1780 *
1781 * Check if the unrestricted guest flag is enabled.
1782 */
1783bool vm_is_unrestricted_guest(struct kvm_vm *vm)
1784{
1785	char val = 'N';
1786	size_t count;
1787	FILE *f;
1788
1789	if (vm == NULL) {
1790		/* Ensure that the KVM vendor-specific module is loaded. */
1791		f = fopen(KVM_DEV_PATH, "r");
1792		TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
1793			    errno);
1794		fclose(f);
1795	}
1796
1797	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
1798	if (f) {
1799		count = fread(&val, sizeof(char), 1, f);
1800		TEST_ASSERT(count == 1, "Unable to read from param file.");
1801		fclose(f);
1802	}
1803
1804	return val == 'Y';
1805}
1806
1807unsigned int vm_get_page_size(struct kvm_vm *vm)
1808{
1809	return vm->page_size;
1810}
1811
1812unsigned int vm_get_page_shift(struct kvm_vm *vm)
1813{
1814	return vm->page_shift;
1815}
1816
1817unsigned int vm_get_max_gfn(struct kvm_vm *vm)
1818{
1819	return vm->max_gfn;
1820}
1821
1822int vm_get_fd(struct kvm_vm *vm)
1823{
1824	return vm->fd;
1825}
1826
1827static unsigned int vm_calc_num_pages(unsigned int num_pages,
1828				      unsigned int page_shift,
1829				      unsigned int new_page_shift,
1830				      bool ceil)
1831{
1832	unsigned int n = 1 << (new_page_shift - page_shift);
1833
1834	if (page_shift >= new_page_shift)
1835		return num_pages * (1 << (page_shift - new_page_shift));
1836
1837	return num_pages / n + !!(ceil && num_pages % n);
1838}
1839
1840static inline int getpageshift(void)
1841{
1842	return __builtin_ffs(getpagesize()) - 1;
1843}
1844
1845unsigned int
1846vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
1847{
1848	return vm_calc_num_pages(num_guest_pages,
1849				 vm_guest_mode_params[mode].page_shift,
1850				 getpageshift(), true);
1851}
1852
1853unsigned int
1854vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
1855{
1856	return vm_calc_num_pages(num_host_pages, getpageshift(),
1857				 vm_guest_mode_params[mode].page_shift, false);
1858}
1859
1860unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
1861{
1862	unsigned int n;
1863	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
1864	return vm_adjust_num_guest_pages(mode, n);
1865}
1866