162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  KVM guest address space mapping code
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *    Copyright IBM Corp. 2007, 2020
662306a36Sopenharmony_ci *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
762306a36Sopenharmony_ci *		 David Hildenbrand <david@redhat.com>
862306a36Sopenharmony_ci *		 Janosch Frank <frankja@linux.vnet.ibm.com>
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/kernel.h>
1262306a36Sopenharmony_ci#include <linux/pagewalk.h>
1362306a36Sopenharmony_ci#include <linux/swap.h>
1462306a36Sopenharmony_ci#include <linux/smp.h>
1562306a36Sopenharmony_ci#include <linux/spinlock.h>
1662306a36Sopenharmony_ci#include <linux/slab.h>
1762306a36Sopenharmony_ci#include <linux/swapops.h>
1862306a36Sopenharmony_ci#include <linux/ksm.h>
1962306a36Sopenharmony_ci#include <linux/mman.h>
2062306a36Sopenharmony_ci#include <linux/pgtable.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <asm/pgalloc.h>
2362306a36Sopenharmony_ci#include <asm/gmap.h>
2462306a36Sopenharmony_ci#include <asm/page.h>
2562306a36Sopenharmony_ci#include <asm/tlb.h>
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define GMAP_SHADOW_FAKE_TABLE 1ULL
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic struct page *gmap_alloc_crst(void)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	struct page *page;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
3462306a36Sopenharmony_ci	if (!page)
3562306a36Sopenharmony_ci		return NULL;
3662306a36Sopenharmony_ci	arch_set_page_dat(page, CRST_ALLOC_ORDER);
3762306a36Sopenharmony_ci	return page;
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci/**
4162306a36Sopenharmony_ci * gmap_alloc - allocate and initialize a guest address space
4262306a36Sopenharmony_ci * @limit: maximum address of the gmap address space
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * Returns a guest address space structure.
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_cistatic struct gmap *gmap_alloc(unsigned long limit)
4762306a36Sopenharmony_ci{
4862306a36Sopenharmony_ci	struct gmap *gmap;
4962306a36Sopenharmony_ci	struct page *page;
5062306a36Sopenharmony_ci	unsigned long *table;
5162306a36Sopenharmony_ci	unsigned long etype, atype;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	if (limit < _REGION3_SIZE) {
5462306a36Sopenharmony_ci		limit = _REGION3_SIZE - 1;
5562306a36Sopenharmony_ci		atype = _ASCE_TYPE_SEGMENT;
5662306a36Sopenharmony_ci		etype = _SEGMENT_ENTRY_EMPTY;
5762306a36Sopenharmony_ci	} else if (limit < _REGION2_SIZE) {
5862306a36Sopenharmony_ci		limit = _REGION2_SIZE - 1;
5962306a36Sopenharmony_ci		atype = _ASCE_TYPE_REGION3;
6062306a36Sopenharmony_ci		etype = _REGION3_ENTRY_EMPTY;
6162306a36Sopenharmony_ci	} else if (limit < _REGION1_SIZE) {
6262306a36Sopenharmony_ci		limit = _REGION1_SIZE - 1;
6362306a36Sopenharmony_ci		atype = _ASCE_TYPE_REGION2;
6462306a36Sopenharmony_ci		etype = _REGION2_ENTRY_EMPTY;
6562306a36Sopenharmony_ci	} else {
6662306a36Sopenharmony_ci		limit = -1UL;
6762306a36Sopenharmony_ci		atype = _ASCE_TYPE_REGION1;
6862306a36Sopenharmony_ci		etype = _REGION1_ENTRY_EMPTY;
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
7162306a36Sopenharmony_ci	if (!gmap)
7262306a36Sopenharmony_ci		goto out;
7362306a36Sopenharmony_ci	INIT_LIST_HEAD(&gmap->crst_list);
7462306a36Sopenharmony_ci	INIT_LIST_HEAD(&gmap->children);
7562306a36Sopenharmony_ci	INIT_LIST_HEAD(&gmap->pt_list);
7662306a36Sopenharmony_ci	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
7762306a36Sopenharmony_ci	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
7862306a36Sopenharmony_ci	INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
7962306a36Sopenharmony_ci	spin_lock_init(&gmap->guest_table_lock);
8062306a36Sopenharmony_ci	spin_lock_init(&gmap->shadow_lock);
8162306a36Sopenharmony_ci	refcount_set(&gmap->ref_count, 1);
8262306a36Sopenharmony_ci	page = gmap_alloc_crst();
8362306a36Sopenharmony_ci	if (!page)
8462306a36Sopenharmony_ci		goto out_free;
8562306a36Sopenharmony_ci	page->index = 0;
8662306a36Sopenharmony_ci	list_add(&page->lru, &gmap->crst_list);
8762306a36Sopenharmony_ci	table = page_to_virt(page);
8862306a36Sopenharmony_ci	crst_table_init(table, etype);
8962306a36Sopenharmony_ci	gmap->table = table;
9062306a36Sopenharmony_ci	gmap->asce = atype | _ASCE_TABLE_LENGTH |
9162306a36Sopenharmony_ci		_ASCE_USER_BITS | __pa(table);
9262306a36Sopenharmony_ci	gmap->asce_end = limit;
9362306a36Sopenharmony_ci	return gmap;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ciout_free:
9662306a36Sopenharmony_ci	kfree(gmap);
9762306a36Sopenharmony_ciout:
9862306a36Sopenharmony_ci	return NULL;
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci/**
10262306a36Sopenharmony_ci * gmap_create - create a guest address space
10362306a36Sopenharmony_ci * @mm: pointer to the parent mm_struct
10462306a36Sopenharmony_ci * @limit: maximum size of the gmap address space
10562306a36Sopenharmony_ci *
10662306a36Sopenharmony_ci * Returns a guest address space structure.
10762306a36Sopenharmony_ci */
10862306a36Sopenharmony_cistruct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	struct gmap *gmap;
11162306a36Sopenharmony_ci	unsigned long gmap_asce;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	gmap = gmap_alloc(limit);
11462306a36Sopenharmony_ci	if (!gmap)
11562306a36Sopenharmony_ci		return NULL;
11662306a36Sopenharmony_ci	gmap->mm = mm;
11762306a36Sopenharmony_ci	spin_lock(&mm->context.lock);
11862306a36Sopenharmony_ci	list_add_rcu(&gmap->list, &mm->context.gmap_list);
11962306a36Sopenharmony_ci	if (list_is_singular(&mm->context.gmap_list))
12062306a36Sopenharmony_ci		gmap_asce = gmap->asce;
12162306a36Sopenharmony_ci	else
12262306a36Sopenharmony_ci		gmap_asce = -1UL;
12362306a36Sopenharmony_ci	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
12462306a36Sopenharmony_ci	spin_unlock(&mm->context.lock);
12562306a36Sopenharmony_ci	return gmap;
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_create);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_cistatic void gmap_flush_tlb(struct gmap *gmap)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	if (MACHINE_HAS_IDTE)
13262306a36Sopenharmony_ci		__tlb_flush_idte(gmap->asce);
13362306a36Sopenharmony_ci	else
13462306a36Sopenharmony_ci		__tlb_flush_global();
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_cistatic void gmap_radix_tree_free(struct radix_tree_root *root)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	struct radix_tree_iter iter;
14062306a36Sopenharmony_ci	unsigned long indices[16];
14162306a36Sopenharmony_ci	unsigned long index;
14262306a36Sopenharmony_ci	void __rcu **slot;
14362306a36Sopenharmony_ci	int i, nr;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	/* A radix tree is freed by deleting all of its entries */
14662306a36Sopenharmony_ci	index = 0;
14762306a36Sopenharmony_ci	do {
14862306a36Sopenharmony_ci		nr = 0;
14962306a36Sopenharmony_ci		radix_tree_for_each_slot(slot, root, &iter, index) {
15062306a36Sopenharmony_ci			indices[nr] = iter.index;
15162306a36Sopenharmony_ci			if (++nr == 16)
15262306a36Sopenharmony_ci				break;
15362306a36Sopenharmony_ci		}
15462306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
15562306a36Sopenharmony_ci			index = indices[i];
15662306a36Sopenharmony_ci			radix_tree_delete(root, index);
15762306a36Sopenharmony_ci		}
15862306a36Sopenharmony_ci	} while (nr > 0);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
16262306a36Sopenharmony_ci{
16362306a36Sopenharmony_ci	struct gmap_rmap *rmap, *rnext, *head;
16462306a36Sopenharmony_ci	struct radix_tree_iter iter;
16562306a36Sopenharmony_ci	unsigned long indices[16];
16662306a36Sopenharmony_ci	unsigned long index;
16762306a36Sopenharmony_ci	void __rcu **slot;
16862306a36Sopenharmony_ci	int i, nr;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	/* A radix tree is freed by deleting all of its entries */
17162306a36Sopenharmony_ci	index = 0;
17262306a36Sopenharmony_ci	do {
17362306a36Sopenharmony_ci		nr = 0;
17462306a36Sopenharmony_ci		radix_tree_for_each_slot(slot, root, &iter, index) {
17562306a36Sopenharmony_ci			indices[nr] = iter.index;
17662306a36Sopenharmony_ci			if (++nr == 16)
17762306a36Sopenharmony_ci				break;
17862306a36Sopenharmony_ci		}
17962306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
18062306a36Sopenharmony_ci			index = indices[i];
18162306a36Sopenharmony_ci			head = radix_tree_delete(root, index);
18262306a36Sopenharmony_ci			gmap_for_each_rmap_safe(rmap, rnext, head)
18362306a36Sopenharmony_ci				kfree(rmap);
18462306a36Sopenharmony_ci		}
18562306a36Sopenharmony_ci	} while (nr > 0);
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci/**
18962306a36Sopenharmony_ci * gmap_free - free a guest address space
19062306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
19162306a36Sopenharmony_ci *
19262306a36Sopenharmony_ci * No locks required. There are no references to this gmap anymore.
19362306a36Sopenharmony_ci */
19462306a36Sopenharmony_cistatic void gmap_free(struct gmap *gmap)
19562306a36Sopenharmony_ci{
19662306a36Sopenharmony_ci	struct page *page, *next;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	/* Flush tlb of all gmaps (if not already done for shadows) */
19962306a36Sopenharmony_ci	if (!(gmap_is_shadow(gmap) && gmap->removed))
20062306a36Sopenharmony_ci		gmap_flush_tlb(gmap);
20162306a36Sopenharmony_ci	/* Free all segment & region tables. */
20262306a36Sopenharmony_ci	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
20362306a36Sopenharmony_ci		__free_pages(page, CRST_ALLOC_ORDER);
20462306a36Sopenharmony_ci	gmap_radix_tree_free(&gmap->guest_to_host);
20562306a36Sopenharmony_ci	gmap_radix_tree_free(&gmap->host_to_guest);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	/* Free additional data for a shadow gmap */
20862306a36Sopenharmony_ci	if (gmap_is_shadow(gmap)) {
20962306a36Sopenharmony_ci		/* Free all page tables. */
21062306a36Sopenharmony_ci		list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
21162306a36Sopenharmony_ci			page_table_free_pgste(page);
21262306a36Sopenharmony_ci		gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
21362306a36Sopenharmony_ci		/* Release reference to the parent */
21462306a36Sopenharmony_ci		gmap_put(gmap->parent);
21562306a36Sopenharmony_ci	}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	kfree(gmap);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci/**
22162306a36Sopenharmony_ci * gmap_get - increase reference counter for guest address space
22262306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
22362306a36Sopenharmony_ci *
22462306a36Sopenharmony_ci * Returns the gmap pointer
22562306a36Sopenharmony_ci */
22662306a36Sopenharmony_cistruct gmap *gmap_get(struct gmap *gmap)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	refcount_inc(&gmap->ref_count);
22962306a36Sopenharmony_ci	return gmap;
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_get);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci/**
23462306a36Sopenharmony_ci * gmap_put - decrease reference counter for guest address space
23562306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
23662306a36Sopenharmony_ci *
23762306a36Sopenharmony_ci * If the reference counter reaches zero the guest address space is freed.
23862306a36Sopenharmony_ci */
23962306a36Sopenharmony_civoid gmap_put(struct gmap *gmap)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	if (refcount_dec_and_test(&gmap->ref_count))
24262306a36Sopenharmony_ci		gmap_free(gmap);
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_put);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci/**
24762306a36Sopenharmony_ci * gmap_remove - remove a guest address space but do not free it yet
24862306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
24962306a36Sopenharmony_ci */
25062306a36Sopenharmony_civoid gmap_remove(struct gmap *gmap)
25162306a36Sopenharmony_ci{
25262306a36Sopenharmony_ci	struct gmap *sg, *next;
25362306a36Sopenharmony_ci	unsigned long gmap_asce;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	/* Remove all shadow gmaps linked to this gmap */
25662306a36Sopenharmony_ci	if (!list_empty(&gmap->children)) {
25762306a36Sopenharmony_ci		spin_lock(&gmap->shadow_lock);
25862306a36Sopenharmony_ci		list_for_each_entry_safe(sg, next, &gmap->children, list) {
25962306a36Sopenharmony_ci			list_del(&sg->list);
26062306a36Sopenharmony_ci			gmap_put(sg);
26162306a36Sopenharmony_ci		}
26262306a36Sopenharmony_ci		spin_unlock(&gmap->shadow_lock);
26362306a36Sopenharmony_ci	}
26462306a36Sopenharmony_ci	/* Remove gmap from the pre-mm list */
26562306a36Sopenharmony_ci	spin_lock(&gmap->mm->context.lock);
26662306a36Sopenharmony_ci	list_del_rcu(&gmap->list);
26762306a36Sopenharmony_ci	if (list_empty(&gmap->mm->context.gmap_list))
26862306a36Sopenharmony_ci		gmap_asce = 0;
26962306a36Sopenharmony_ci	else if (list_is_singular(&gmap->mm->context.gmap_list))
27062306a36Sopenharmony_ci		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
27162306a36Sopenharmony_ci					     struct gmap, list)->asce;
27262306a36Sopenharmony_ci	else
27362306a36Sopenharmony_ci		gmap_asce = -1UL;
27462306a36Sopenharmony_ci	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
27562306a36Sopenharmony_ci	spin_unlock(&gmap->mm->context.lock);
27662306a36Sopenharmony_ci	synchronize_rcu();
27762306a36Sopenharmony_ci	/* Put reference */
27862306a36Sopenharmony_ci	gmap_put(gmap);
27962306a36Sopenharmony_ci}
28062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_remove);
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci/**
28362306a36Sopenharmony_ci * gmap_enable - switch primary space to the guest address space
28462306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
28562306a36Sopenharmony_ci */
28662306a36Sopenharmony_civoid gmap_enable(struct gmap *gmap)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	S390_lowcore.gmap = (unsigned long) gmap;
28962306a36Sopenharmony_ci}
29062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_enable);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci/**
29362306a36Sopenharmony_ci * gmap_disable - switch back to the standard primary address space
29462306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
29562306a36Sopenharmony_ci */
29662306a36Sopenharmony_civoid gmap_disable(struct gmap *gmap)
29762306a36Sopenharmony_ci{
29862306a36Sopenharmony_ci	S390_lowcore.gmap = 0UL;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_disable);
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci/**
30362306a36Sopenharmony_ci * gmap_get_enabled - get a pointer to the currently enabled gmap
30462306a36Sopenharmony_ci *
30562306a36Sopenharmony_ci * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
30662306a36Sopenharmony_ci */
30762306a36Sopenharmony_cistruct gmap *gmap_get_enabled(void)
30862306a36Sopenharmony_ci{
30962306a36Sopenharmony_ci	return (struct gmap *) S390_lowcore.gmap;
31062306a36Sopenharmony_ci}
31162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_get_enabled);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci/*
31462306a36Sopenharmony_ci * gmap_alloc_table is assumed to be called with mmap_lock held
31562306a36Sopenharmony_ci */
31662306a36Sopenharmony_cistatic int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
31762306a36Sopenharmony_ci			    unsigned long init, unsigned long gaddr)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	struct page *page;
32062306a36Sopenharmony_ci	unsigned long *new;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	/* since we dont free the gmap table until gmap_free we can unlock */
32362306a36Sopenharmony_ci	page = gmap_alloc_crst();
32462306a36Sopenharmony_ci	if (!page)
32562306a36Sopenharmony_ci		return -ENOMEM;
32662306a36Sopenharmony_ci	new = page_to_virt(page);
32762306a36Sopenharmony_ci	crst_table_init(new, init);
32862306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
32962306a36Sopenharmony_ci	if (*table & _REGION_ENTRY_INVALID) {
33062306a36Sopenharmony_ci		list_add(&page->lru, &gmap->crst_list);
33162306a36Sopenharmony_ci		*table = __pa(new) | _REGION_ENTRY_LENGTH |
33262306a36Sopenharmony_ci			(*table & _REGION_ENTRY_TYPE_MASK);
33362306a36Sopenharmony_ci		page->index = gaddr;
33462306a36Sopenharmony_ci		page = NULL;
33562306a36Sopenharmony_ci	}
33662306a36Sopenharmony_ci	spin_unlock(&gmap->guest_table_lock);
33762306a36Sopenharmony_ci	if (page)
33862306a36Sopenharmony_ci		__free_pages(page, CRST_ALLOC_ORDER);
33962306a36Sopenharmony_ci	return 0;
34062306a36Sopenharmony_ci}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci/**
34362306a36Sopenharmony_ci * __gmap_segment_gaddr - find virtual address from segment pointer
34462306a36Sopenharmony_ci * @entry: pointer to a segment table entry in the guest address space
34562306a36Sopenharmony_ci *
34662306a36Sopenharmony_ci * Returns the virtual address in the guest address space for the segment
34762306a36Sopenharmony_ci */
34862306a36Sopenharmony_cistatic unsigned long __gmap_segment_gaddr(unsigned long *entry)
34962306a36Sopenharmony_ci{
35062306a36Sopenharmony_ci	struct page *page;
35162306a36Sopenharmony_ci	unsigned long offset;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	offset = (unsigned long) entry / sizeof(unsigned long);
35462306a36Sopenharmony_ci	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
35562306a36Sopenharmony_ci	page = pmd_pgtable_page((pmd_t *) entry);
35662306a36Sopenharmony_ci	return page->index + offset;
35762306a36Sopenharmony_ci}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci/**
36062306a36Sopenharmony_ci * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
36162306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
36262306a36Sopenharmony_ci * @vmaddr: address in the host process address space
36362306a36Sopenharmony_ci *
36462306a36Sopenharmony_ci * Returns 1 if a TLB flush is required
36562306a36Sopenharmony_ci */
36662306a36Sopenharmony_cistatic int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	unsigned long *entry;
36962306a36Sopenharmony_ci	int flush = 0;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
37262306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
37362306a36Sopenharmony_ci	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
37462306a36Sopenharmony_ci	if (entry) {
37562306a36Sopenharmony_ci		flush = (*entry != _SEGMENT_ENTRY_EMPTY);
37662306a36Sopenharmony_ci		*entry = _SEGMENT_ENTRY_EMPTY;
37762306a36Sopenharmony_ci	}
37862306a36Sopenharmony_ci	spin_unlock(&gmap->guest_table_lock);
37962306a36Sopenharmony_ci	return flush;
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci/**
38362306a36Sopenharmony_ci * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
38462306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
38562306a36Sopenharmony_ci * @gaddr: address in the guest address space
38662306a36Sopenharmony_ci *
38762306a36Sopenharmony_ci * Returns 1 if a TLB flush is required
38862306a36Sopenharmony_ci */
38962306a36Sopenharmony_cistatic int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
39062306a36Sopenharmony_ci{
39162306a36Sopenharmony_ci	unsigned long vmaddr;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
39462306a36Sopenharmony_ci						   gaddr >> PMD_SHIFT);
39562306a36Sopenharmony_ci	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci/**
39962306a36Sopenharmony_ci * gmap_unmap_segment - unmap segment from the guest address space
40062306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
40162306a36Sopenharmony_ci * @to: address in the guest address space
40262306a36Sopenharmony_ci * @len: length of the memory area to unmap
40362306a36Sopenharmony_ci *
40462306a36Sopenharmony_ci * Returns 0 if the unmap succeeded, -EINVAL if not.
40562306a36Sopenharmony_ci */
40662306a36Sopenharmony_ciint gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
40762306a36Sopenharmony_ci{
40862306a36Sopenharmony_ci	unsigned long off;
40962306a36Sopenharmony_ci	int flush;
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
41262306a36Sopenharmony_ci	if ((to | len) & (PMD_SIZE - 1))
41362306a36Sopenharmony_ci		return -EINVAL;
41462306a36Sopenharmony_ci	if (len == 0 || to + len < to)
41562306a36Sopenharmony_ci		return -EINVAL;
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	flush = 0;
41862306a36Sopenharmony_ci	mmap_write_lock(gmap->mm);
41962306a36Sopenharmony_ci	for (off = 0; off < len; off += PMD_SIZE)
42062306a36Sopenharmony_ci		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
42162306a36Sopenharmony_ci	mmap_write_unlock(gmap->mm);
42262306a36Sopenharmony_ci	if (flush)
42362306a36Sopenharmony_ci		gmap_flush_tlb(gmap);
42462306a36Sopenharmony_ci	return 0;
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_unmap_segment);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci/**
42962306a36Sopenharmony_ci * gmap_map_segment - map a segment to the guest address space
43062306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
43162306a36Sopenharmony_ci * @from: source address in the parent address space
43262306a36Sopenharmony_ci * @to: target address in the guest address space
43362306a36Sopenharmony_ci * @len: length of the memory area to map
43462306a36Sopenharmony_ci *
43562306a36Sopenharmony_ci * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
43662306a36Sopenharmony_ci */
43762306a36Sopenharmony_ciint gmap_map_segment(struct gmap *gmap, unsigned long from,
43862306a36Sopenharmony_ci		     unsigned long to, unsigned long len)
43962306a36Sopenharmony_ci{
44062306a36Sopenharmony_ci	unsigned long off;
44162306a36Sopenharmony_ci	int flush;
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
44462306a36Sopenharmony_ci	if ((from | to | len) & (PMD_SIZE - 1))
44562306a36Sopenharmony_ci		return -EINVAL;
44662306a36Sopenharmony_ci	if (len == 0 || from + len < from || to + len < to ||
44762306a36Sopenharmony_ci	    from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
44862306a36Sopenharmony_ci		return -EINVAL;
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	flush = 0;
45162306a36Sopenharmony_ci	mmap_write_lock(gmap->mm);
45262306a36Sopenharmony_ci	for (off = 0; off < len; off += PMD_SIZE) {
45362306a36Sopenharmony_ci		/* Remove old translation */
45462306a36Sopenharmony_ci		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
45562306a36Sopenharmony_ci		/* Store new translation */
45662306a36Sopenharmony_ci		if (radix_tree_insert(&gmap->guest_to_host,
45762306a36Sopenharmony_ci				      (to + off) >> PMD_SHIFT,
45862306a36Sopenharmony_ci				      (void *) from + off))
45962306a36Sopenharmony_ci			break;
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci	mmap_write_unlock(gmap->mm);
46262306a36Sopenharmony_ci	if (flush)
46362306a36Sopenharmony_ci		gmap_flush_tlb(gmap);
46462306a36Sopenharmony_ci	if (off >= len)
46562306a36Sopenharmony_ci		return 0;
46662306a36Sopenharmony_ci	gmap_unmap_segment(gmap, to, len);
46762306a36Sopenharmony_ci	return -ENOMEM;
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_map_segment);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci/**
47262306a36Sopenharmony_ci * __gmap_translate - translate a guest address to a user space address
47362306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
47462306a36Sopenharmony_ci * @gaddr: guest address
47562306a36Sopenharmony_ci *
47662306a36Sopenharmony_ci * Returns user space address which corresponds to the guest address or
47762306a36Sopenharmony_ci * -EFAULT if no such mapping exists.
47862306a36Sopenharmony_ci * This function does not establish potentially missing page table entries.
47962306a36Sopenharmony_ci * The mmap_lock of the mm that belongs to the address space must be held
48062306a36Sopenharmony_ci * when this function gets called.
48162306a36Sopenharmony_ci *
48262306a36Sopenharmony_ci * Note: Can also be called for shadow gmaps.
48362306a36Sopenharmony_ci */
48462306a36Sopenharmony_ciunsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	unsigned long vmaddr;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	vmaddr = (unsigned long)
48962306a36Sopenharmony_ci		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
49062306a36Sopenharmony_ci	/* Note: guest_to_host is empty for a shadow gmap */
49162306a36Sopenharmony_ci	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
49262306a36Sopenharmony_ci}
49362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__gmap_translate);
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci/**
49662306a36Sopenharmony_ci * gmap_translate - translate a guest address to a user space address
49762306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
49862306a36Sopenharmony_ci * @gaddr: guest address
49962306a36Sopenharmony_ci *
50062306a36Sopenharmony_ci * Returns user space address which corresponds to the guest address or
50162306a36Sopenharmony_ci * -EFAULT if no such mapping exists.
50262306a36Sopenharmony_ci * This function does not establish potentially missing page table entries.
50362306a36Sopenharmony_ci */
50462306a36Sopenharmony_ciunsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
50562306a36Sopenharmony_ci{
50662306a36Sopenharmony_ci	unsigned long rc;
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	mmap_read_lock(gmap->mm);
50962306a36Sopenharmony_ci	rc = __gmap_translate(gmap, gaddr);
51062306a36Sopenharmony_ci	mmap_read_unlock(gmap->mm);
51162306a36Sopenharmony_ci	return rc;
51262306a36Sopenharmony_ci}
51362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_translate);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci/**
51662306a36Sopenharmony_ci * gmap_unlink - disconnect a page table from the gmap shadow tables
51762306a36Sopenharmony_ci * @mm: pointer to the parent mm_struct
51862306a36Sopenharmony_ci * @table: pointer to the host page table
51962306a36Sopenharmony_ci * @vmaddr: vm address associated with the host page table
52062306a36Sopenharmony_ci */
52162306a36Sopenharmony_civoid gmap_unlink(struct mm_struct *mm, unsigned long *table,
52262306a36Sopenharmony_ci		 unsigned long vmaddr)
52362306a36Sopenharmony_ci{
52462306a36Sopenharmony_ci	struct gmap *gmap;
52562306a36Sopenharmony_ci	int flush;
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	rcu_read_lock();
52862306a36Sopenharmony_ci	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
52962306a36Sopenharmony_ci		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
53062306a36Sopenharmony_ci		if (flush)
53162306a36Sopenharmony_ci			gmap_flush_tlb(gmap);
53262306a36Sopenharmony_ci	}
53362306a36Sopenharmony_ci	rcu_read_unlock();
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_cistatic void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
53762306a36Sopenharmony_ci			   unsigned long gaddr);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci/**
54062306a36Sopenharmony_ci * __gmap_link - set up shadow page tables to connect a host to a guest address
54162306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
54262306a36Sopenharmony_ci * @gaddr: guest address
54362306a36Sopenharmony_ci * @vmaddr: vm address
54462306a36Sopenharmony_ci *
54562306a36Sopenharmony_ci * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
54662306a36Sopenharmony_ci * if the vm address is already mapped to a different guest segment.
54762306a36Sopenharmony_ci * The mmap_lock of the mm that belongs to the address space must be held
54862306a36Sopenharmony_ci * when this function gets called.
54962306a36Sopenharmony_ci */
55062306a36Sopenharmony_ciint __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	struct mm_struct *mm;
55362306a36Sopenharmony_ci	unsigned long *table;
55462306a36Sopenharmony_ci	spinlock_t *ptl;
55562306a36Sopenharmony_ci	pgd_t *pgd;
55662306a36Sopenharmony_ci	p4d_t *p4d;
55762306a36Sopenharmony_ci	pud_t *pud;
55862306a36Sopenharmony_ci	pmd_t *pmd;
55962306a36Sopenharmony_ci	u64 unprot;
56062306a36Sopenharmony_ci	int rc;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
56362306a36Sopenharmony_ci	/* Create higher level tables in the gmap page table */
56462306a36Sopenharmony_ci	table = gmap->table;
56562306a36Sopenharmony_ci	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
56662306a36Sopenharmony_ci		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
56762306a36Sopenharmony_ci		if ((*table & _REGION_ENTRY_INVALID) &&
56862306a36Sopenharmony_ci		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
56962306a36Sopenharmony_ci				     gaddr & _REGION1_MASK))
57062306a36Sopenharmony_ci			return -ENOMEM;
57162306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
57462306a36Sopenharmony_ci		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
57562306a36Sopenharmony_ci		if ((*table & _REGION_ENTRY_INVALID) &&
57662306a36Sopenharmony_ci		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
57762306a36Sopenharmony_ci				     gaddr & _REGION2_MASK))
57862306a36Sopenharmony_ci			return -ENOMEM;
57962306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
58262306a36Sopenharmony_ci		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
58362306a36Sopenharmony_ci		if ((*table & _REGION_ENTRY_INVALID) &&
58462306a36Sopenharmony_ci		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
58562306a36Sopenharmony_ci				     gaddr & _REGION3_MASK))
58662306a36Sopenharmony_ci			return -ENOMEM;
58762306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
58862306a36Sopenharmony_ci	}
58962306a36Sopenharmony_ci	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
59062306a36Sopenharmony_ci	/* Walk the parent mm page table */
59162306a36Sopenharmony_ci	mm = gmap->mm;
59262306a36Sopenharmony_ci	pgd = pgd_offset(mm, vmaddr);
59362306a36Sopenharmony_ci	VM_BUG_ON(pgd_none(*pgd));
59462306a36Sopenharmony_ci	p4d = p4d_offset(pgd, vmaddr);
59562306a36Sopenharmony_ci	VM_BUG_ON(p4d_none(*p4d));
59662306a36Sopenharmony_ci	pud = pud_offset(p4d, vmaddr);
59762306a36Sopenharmony_ci	VM_BUG_ON(pud_none(*pud));
59862306a36Sopenharmony_ci	/* large puds cannot yet be handled */
59962306a36Sopenharmony_ci	if (pud_large(*pud))
60062306a36Sopenharmony_ci		return -EFAULT;
60162306a36Sopenharmony_ci	pmd = pmd_offset(pud, vmaddr);
60262306a36Sopenharmony_ci	VM_BUG_ON(pmd_none(*pmd));
60362306a36Sopenharmony_ci	/* Are we allowed to use huge pages? */
60462306a36Sopenharmony_ci	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
60562306a36Sopenharmony_ci		return -EFAULT;
60662306a36Sopenharmony_ci	/* Link gmap segment table entry location to page table. */
60762306a36Sopenharmony_ci	rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
60862306a36Sopenharmony_ci	if (rc)
60962306a36Sopenharmony_ci		return rc;
61062306a36Sopenharmony_ci	ptl = pmd_lock(mm, pmd);
61162306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
61262306a36Sopenharmony_ci	if (*table == _SEGMENT_ENTRY_EMPTY) {
61362306a36Sopenharmony_ci		rc = radix_tree_insert(&gmap->host_to_guest,
61462306a36Sopenharmony_ci				       vmaddr >> PMD_SHIFT, table);
61562306a36Sopenharmony_ci		if (!rc) {
61662306a36Sopenharmony_ci			if (pmd_large(*pmd)) {
61762306a36Sopenharmony_ci				*table = (pmd_val(*pmd) &
61862306a36Sopenharmony_ci					  _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
61962306a36Sopenharmony_ci					| _SEGMENT_ENTRY_GMAP_UC;
62062306a36Sopenharmony_ci			} else
62162306a36Sopenharmony_ci				*table = pmd_val(*pmd) &
62262306a36Sopenharmony_ci					_SEGMENT_ENTRY_HARDWARE_BITS;
62362306a36Sopenharmony_ci		}
62462306a36Sopenharmony_ci	} else if (*table & _SEGMENT_ENTRY_PROTECT &&
62562306a36Sopenharmony_ci		   !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
62662306a36Sopenharmony_ci		unprot = (u64)*table;
62762306a36Sopenharmony_ci		unprot &= ~_SEGMENT_ENTRY_PROTECT;
62862306a36Sopenharmony_ci		unprot |= _SEGMENT_ENTRY_GMAP_UC;
62962306a36Sopenharmony_ci		gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
63062306a36Sopenharmony_ci	}
63162306a36Sopenharmony_ci	spin_unlock(&gmap->guest_table_lock);
63262306a36Sopenharmony_ci	spin_unlock(ptl);
63362306a36Sopenharmony_ci	radix_tree_preload_end();
63462306a36Sopenharmony_ci	return rc;
63562306a36Sopenharmony_ci}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci/**
63862306a36Sopenharmony_ci * gmap_fault - resolve a fault on a guest address
63962306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
64062306a36Sopenharmony_ci * @gaddr: guest address
64162306a36Sopenharmony_ci * @fault_flags: flags to pass down to handle_mm_fault()
64262306a36Sopenharmony_ci *
64362306a36Sopenharmony_ci * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
64462306a36Sopenharmony_ci * if the vm address is already mapped to a different guest segment.
64562306a36Sopenharmony_ci */
64662306a36Sopenharmony_ciint gmap_fault(struct gmap *gmap, unsigned long gaddr,
64762306a36Sopenharmony_ci	       unsigned int fault_flags)
64862306a36Sopenharmony_ci{
64962306a36Sopenharmony_ci	unsigned long vmaddr;
65062306a36Sopenharmony_ci	int rc;
65162306a36Sopenharmony_ci	bool unlocked;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	mmap_read_lock(gmap->mm);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ciretry:
65662306a36Sopenharmony_ci	unlocked = false;
65762306a36Sopenharmony_ci	vmaddr = __gmap_translate(gmap, gaddr);
65862306a36Sopenharmony_ci	if (IS_ERR_VALUE(vmaddr)) {
65962306a36Sopenharmony_ci		rc = vmaddr;
66062306a36Sopenharmony_ci		goto out_up;
66162306a36Sopenharmony_ci	}
66262306a36Sopenharmony_ci	if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
66362306a36Sopenharmony_ci			     &unlocked)) {
66462306a36Sopenharmony_ci		rc = -EFAULT;
66562306a36Sopenharmony_ci		goto out_up;
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci	/*
66862306a36Sopenharmony_ci	 * In the case that fixup_user_fault unlocked the mmap_lock during
66962306a36Sopenharmony_ci	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
67062306a36Sopenharmony_ci	 */
67162306a36Sopenharmony_ci	if (unlocked)
67262306a36Sopenharmony_ci		goto retry;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	rc = __gmap_link(gmap, gaddr, vmaddr);
67562306a36Sopenharmony_ciout_up:
67662306a36Sopenharmony_ci	mmap_read_unlock(gmap->mm);
67762306a36Sopenharmony_ci	return rc;
67862306a36Sopenharmony_ci}
67962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_fault);
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci/*
68262306a36Sopenharmony_ci * this function is assumed to be called with mmap_lock held
68362306a36Sopenharmony_ci */
68462306a36Sopenharmony_civoid __gmap_zap(struct gmap *gmap, unsigned long gaddr)
68562306a36Sopenharmony_ci{
68662306a36Sopenharmony_ci	struct vm_area_struct *vma;
68762306a36Sopenharmony_ci	unsigned long vmaddr;
68862306a36Sopenharmony_ci	spinlock_t *ptl;
68962306a36Sopenharmony_ci	pte_t *ptep;
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	/* Find the vm address for the guest address */
69262306a36Sopenharmony_ci	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
69362306a36Sopenharmony_ci						   gaddr >> PMD_SHIFT);
69462306a36Sopenharmony_ci	if (vmaddr) {
69562306a36Sopenharmony_ci		vmaddr |= gaddr & ~PMD_MASK;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci		vma = vma_lookup(gmap->mm, vmaddr);
69862306a36Sopenharmony_ci		if (!vma || is_vm_hugetlb_page(vma))
69962306a36Sopenharmony_ci			return;
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci		/* Get pointer to the page table entry */
70262306a36Sopenharmony_ci		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
70362306a36Sopenharmony_ci		if (likely(ptep)) {
70462306a36Sopenharmony_ci			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
70562306a36Sopenharmony_ci			pte_unmap_unlock(ptep, ptl);
70662306a36Sopenharmony_ci		}
70762306a36Sopenharmony_ci	}
70862306a36Sopenharmony_ci}
70962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__gmap_zap);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_civoid gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
71262306a36Sopenharmony_ci{
71362306a36Sopenharmony_ci	unsigned long gaddr, vmaddr, size;
71462306a36Sopenharmony_ci	struct vm_area_struct *vma;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	mmap_read_lock(gmap->mm);
71762306a36Sopenharmony_ci	for (gaddr = from; gaddr < to;
71862306a36Sopenharmony_ci	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
71962306a36Sopenharmony_ci		/* Find the vm address for the guest address */
72062306a36Sopenharmony_ci		vmaddr = (unsigned long)
72162306a36Sopenharmony_ci			radix_tree_lookup(&gmap->guest_to_host,
72262306a36Sopenharmony_ci					  gaddr >> PMD_SHIFT);
72362306a36Sopenharmony_ci		if (!vmaddr)
72462306a36Sopenharmony_ci			continue;
72562306a36Sopenharmony_ci		vmaddr |= gaddr & ~PMD_MASK;
72662306a36Sopenharmony_ci		/* Find vma in the parent mm */
72762306a36Sopenharmony_ci		vma = find_vma(gmap->mm, vmaddr);
72862306a36Sopenharmony_ci		if (!vma)
72962306a36Sopenharmony_ci			continue;
73062306a36Sopenharmony_ci		/*
73162306a36Sopenharmony_ci		 * We do not discard pages that are backed by
73262306a36Sopenharmony_ci		 * hugetlbfs, so we don't have to refault them.
73362306a36Sopenharmony_ci		 */
73462306a36Sopenharmony_ci		if (is_vm_hugetlb_page(vma))
73562306a36Sopenharmony_ci			continue;
73662306a36Sopenharmony_ci		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
73762306a36Sopenharmony_ci		zap_page_range_single(vma, vmaddr, size, NULL);
73862306a36Sopenharmony_ci	}
73962306a36Sopenharmony_ci	mmap_read_unlock(gmap->mm);
74062306a36Sopenharmony_ci}
74162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_discard);
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_cistatic LIST_HEAD(gmap_notifier_list);
74462306a36Sopenharmony_cistatic DEFINE_SPINLOCK(gmap_notifier_lock);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci/**
74762306a36Sopenharmony_ci * gmap_register_pte_notifier - register a pte invalidation callback
74862306a36Sopenharmony_ci * @nb: pointer to the gmap notifier block
74962306a36Sopenharmony_ci */
75062306a36Sopenharmony_civoid gmap_register_pte_notifier(struct gmap_notifier *nb)
75162306a36Sopenharmony_ci{
75262306a36Sopenharmony_ci	spin_lock(&gmap_notifier_lock);
75362306a36Sopenharmony_ci	list_add_rcu(&nb->list, &gmap_notifier_list);
75462306a36Sopenharmony_ci	spin_unlock(&gmap_notifier_lock);
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci/**
75962306a36Sopenharmony_ci * gmap_unregister_pte_notifier - remove a pte invalidation callback
76062306a36Sopenharmony_ci * @nb: pointer to the gmap notifier block
76162306a36Sopenharmony_ci */
76262306a36Sopenharmony_civoid gmap_unregister_pte_notifier(struct gmap_notifier *nb)
76362306a36Sopenharmony_ci{
76462306a36Sopenharmony_ci	spin_lock(&gmap_notifier_lock);
76562306a36Sopenharmony_ci	list_del_rcu(&nb->list);
76662306a36Sopenharmony_ci	spin_unlock(&gmap_notifier_lock);
76762306a36Sopenharmony_ci	synchronize_rcu();
76862306a36Sopenharmony_ci}
76962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci/**
77262306a36Sopenharmony_ci * gmap_call_notifier - call all registered invalidation callbacks
77362306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
77462306a36Sopenharmony_ci * @start: start virtual address in the guest address space
77562306a36Sopenharmony_ci * @end: end virtual address in the guest address space
77662306a36Sopenharmony_ci */
77762306a36Sopenharmony_cistatic void gmap_call_notifier(struct gmap *gmap, unsigned long start,
77862306a36Sopenharmony_ci			       unsigned long end)
77962306a36Sopenharmony_ci{
78062306a36Sopenharmony_ci	struct gmap_notifier *nb;
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	list_for_each_entry(nb, &gmap_notifier_list, list)
78362306a36Sopenharmony_ci		nb->notifier_call(gmap, start, end);
78462306a36Sopenharmony_ci}
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci/**
78762306a36Sopenharmony_ci * gmap_table_walk - walk the gmap page tables
78862306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
78962306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
79062306a36Sopenharmony_ci * @level: page table level to stop at
79162306a36Sopenharmony_ci *
79262306a36Sopenharmony_ci * Returns a table entry pointer for the given guest address and @level
79362306a36Sopenharmony_ci * @level=0 : returns a pointer to a page table table entry (or NULL)
79462306a36Sopenharmony_ci * @level=1 : returns a pointer to a segment table entry (or NULL)
79562306a36Sopenharmony_ci * @level=2 : returns a pointer to a region-3 table entry (or NULL)
79662306a36Sopenharmony_ci * @level=3 : returns a pointer to a region-2 table entry (or NULL)
79762306a36Sopenharmony_ci * @level=4 : returns a pointer to a region-1 table entry (or NULL)
79862306a36Sopenharmony_ci *
79962306a36Sopenharmony_ci * Returns NULL if the gmap page tables could not be walked to the
80062306a36Sopenharmony_ci * requested level.
80162306a36Sopenharmony_ci *
80262306a36Sopenharmony_ci * Note: Can also be called for shadow gmaps.
80362306a36Sopenharmony_ci */
80462306a36Sopenharmony_cistatic inline unsigned long *gmap_table_walk(struct gmap *gmap,
80562306a36Sopenharmony_ci					     unsigned long gaddr, int level)
80662306a36Sopenharmony_ci{
80762306a36Sopenharmony_ci	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
80862306a36Sopenharmony_ci	unsigned long *table = gmap->table;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	if (gmap_is_shadow(gmap) && gmap->removed)
81162306a36Sopenharmony_ci		return NULL;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
81462306a36Sopenharmony_ci		return NULL;
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	if (asce_type != _ASCE_TYPE_REGION1 &&
81762306a36Sopenharmony_ci	    gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
81862306a36Sopenharmony_ci		return NULL;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	switch (asce_type) {
82162306a36Sopenharmony_ci	case _ASCE_TYPE_REGION1:
82262306a36Sopenharmony_ci		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
82362306a36Sopenharmony_ci		if (level == 4)
82462306a36Sopenharmony_ci			break;
82562306a36Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
82662306a36Sopenharmony_ci			return NULL;
82762306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
82862306a36Sopenharmony_ci		fallthrough;
82962306a36Sopenharmony_ci	case _ASCE_TYPE_REGION2:
83062306a36Sopenharmony_ci		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
83162306a36Sopenharmony_ci		if (level == 3)
83262306a36Sopenharmony_ci			break;
83362306a36Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
83462306a36Sopenharmony_ci			return NULL;
83562306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
83662306a36Sopenharmony_ci		fallthrough;
83762306a36Sopenharmony_ci	case _ASCE_TYPE_REGION3:
83862306a36Sopenharmony_ci		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
83962306a36Sopenharmony_ci		if (level == 2)
84062306a36Sopenharmony_ci			break;
84162306a36Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
84262306a36Sopenharmony_ci			return NULL;
84362306a36Sopenharmony_ci		table = __va(*table & _REGION_ENTRY_ORIGIN);
84462306a36Sopenharmony_ci		fallthrough;
84562306a36Sopenharmony_ci	case _ASCE_TYPE_SEGMENT:
84662306a36Sopenharmony_ci		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
84762306a36Sopenharmony_ci		if (level == 1)
84862306a36Sopenharmony_ci			break;
84962306a36Sopenharmony_ci		if (*table & _REGION_ENTRY_INVALID)
85062306a36Sopenharmony_ci			return NULL;
85162306a36Sopenharmony_ci		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
85262306a36Sopenharmony_ci		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
85362306a36Sopenharmony_ci	}
85462306a36Sopenharmony_ci	return table;
85562306a36Sopenharmony_ci}
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci/**
85862306a36Sopenharmony_ci * gmap_pte_op_walk - walk the gmap page table, get the page table lock
85962306a36Sopenharmony_ci *		      and return the pte pointer
86062306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
86162306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
86262306a36Sopenharmony_ci * @ptl: pointer to the spinlock pointer
86362306a36Sopenharmony_ci *
86462306a36Sopenharmony_ci * Returns a pointer to the locked pte for a guest address, or NULL
86562306a36Sopenharmony_ci */
86662306a36Sopenharmony_cistatic pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
86762306a36Sopenharmony_ci			       spinlock_t **ptl)
86862306a36Sopenharmony_ci{
86962306a36Sopenharmony_ci	unsigned long *table;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
87262306a36Sopenharmony_ci	/* Walk the gmap page table, lock and get pte pointer */
87362306a36Sopenharmony_ci	table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
87462306a36Sopenharmony_ci	if (!table || *table & _SEGMENT_ENTRY_INVALID)
87562306a36Sopenharmony_ci		return NULL;
87662306a36Sopenharmony_ci	return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
87762306a36Sopenharmony_ci}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci/**
88062306a36Sopenharmony_ci * gmap_pte_op_fixup - force a page in and connect the gmap page table
88162306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
88262306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
88362306a36Sopenharmony_ci * @vmaddr: address in the host process address space
88462306a36Sopenharmony_ci * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
88562306a36Sopenharmony_ci *
88662306a36Sopenharmony_ci * Returns 0 if the caller can retry __gmap_translate (might fail again),
88762306a36Sopenharmony_ci * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
88862306a36Sopenharmony_ci * up or connecting the gmap page table.
88962306a36Sopenharmony_ci */
89062306a36Sopenharmony_cistatic int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
89162306a36Sopenharmony_ci			     unsigned long vmaddr, int prot)
89262306a36Sopenharmony_ci{
89362306a36Sopenharmony_ci	struct mm_struct *mm = gmap->mm;
89462306a36Sopenharmony_ci	unsigned int fault_flags;
89562306a36Sopenharmony_ci	bool unlocked = false;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
89862306a36Sopenharmony_ci	fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
89962306a36Sopenharmony_ci	if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
90062306a36Sopenharmony_ci		return -EFAULT;
90162306a36Sopenharmony_ci	if (unlocked)
90262306a36Sopenharmony_ci		/* lost mmap_lock, caller has to retry __gmap_translate */
90362306a36Sopenharmony_ci		return 0;
90462306a36Sopenharmony_ci	/* Connect the page tables */
90562306a36Sopenharmony_ci	return __gmap_link(gmap, gaddr, vmaddr);
90662306a36Sopenharmony_ci}
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci/**
90962306a36Sopenharmony_ci * gmap_pte_op_end - release the page table lock
91062306a36Sopenharmony_ci * @ptep: pointer to the locked pte
91162306a36Sopenharmony_ci * @ptl: pointer to the page table spinlock
91262306a36Sopenharmony_ci */
91362306a36Sopenharmony_cistatic void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
91462306a36Sopenharmony_ci{
91562306a36Sopenharmony_ci	pte_unmap_unlock(ptep, ptl);
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci/**
91962306a36Sopenharmony_ci * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
92062306a36Sopenharmony_ci *		      and return the pmd pointer
92162306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
92262306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
92362306a36Sopenharmony_ci *
92462306a36Sopenharmony_ci * Returns a pointer to the pmd for a guest address, or NULL
92562306a36Sopenharmony_ci */
92662306a36Sopenharmony_cistatic inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	pmd_t *pmdp;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
93162306a36Sopenharmony_ci	pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
93262306a36Sopenharmony_ci	if (!pmdp)
93362306a36Sopenharmony_ci		return NULL;
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	/* without huge pages, there is no need to take the table lock */
93662306a36Sopenharmony_ci	if (!gmap->mm->context.allow_gmap_hpage_1m)
93762306a36Sopenharmony_ci		return pmd_none(*pmdp) ? NULL : pmdp;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
94062306a36Sopenharmony_ci	if (pmd_none(*pmdp)) {
94162306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
94262306a36Sopenharmony_ci		return NULL;
94362306a36Sopenharmony_ci	}
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
94662306a36Sopenharmony_ci	if (!pmd_large(*pmdp))
94762306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
94862306a36Sopenharmony_ci	return pmdp;
94962306a36Sopenharmony_ci}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci/**
95262306a36Sopenharmony_ci * gmap_pmd_op_end - release the guest_table_lock if needed
95362306a36Sopenharmony_ci * @gmap: pointer to the guest mapping meta data structure
95462306a36Sopenharmony_ci * @pmdp: pointer to the pmd
95562306a36Sopenharmony_ci */
95662306a36Sopenharmony_cistatic inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	if (pmd_large(*pmdp))
95962306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
96062306a36Sopenharmony_ci}
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci/*
96362306a36Sopenharmony_ci * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
96462306a36Sopenharmony_ci * @pmdp: pointer to the pmd to be protected
96562306a36Sopenharmony_ci * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
96662306a36Sopenharmony_ci * @bits: notification bits to set
96762306a36Sopenharmony_ci *
96862306a36Sopenharmony_ci * Returns:
96962306a36Sopenharmony_ci * 0 if successfully protected
97062306a36Sopenharmony_ci * -EAGAIN if a fixup is needed
97162306a36Sopenharmony_ci * -EINVAL if unsupported notifier bits have been specified
97262306a36Sopenharmony_ci *
97362306a36Sopenharmony_ci * Expected to be called with sg->mm->mmap_lock in read and
97462306a36Sopenharmony_ci * guest_table_lock held.
97562306a36Sopenharmony_ci */
97662306a36Sopenharmony_cistatic int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
97762306a36Sopenharmony_ci			    pmd_t *pmdp, int prot, unsigned long bits)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
98062306a36Sopenharmony_ci	int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
98162306a36Sopenharmony_ci	pmd_t new = *pmdp;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	/* Fixup needed */
98462306a36Sopenharmony_ci	if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
98562306a36Sopenharmony_ci		return -EAGAIN;
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	if (prot == PROT_NONE && !pmd_i) {
98862306a36Sopenharmony_ci		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
98962306a36Sopenharmony_ci		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
99062306a36Sopenharmony_ci	}
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	if (prot == PROT_READ && !pmd_p) {
99362306a36Sopenharmony_ci		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
99462306a36Sopenharmony_ci		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
99562306a36Sopenharmony_ci		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
99662306a36Sopenharmony_ci	}
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	if (bits & GMAP_NOTIFY_MPROT)
99962306a36Sopenharmony_ci		set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci	/* Shadow GMAP protection needs split PMDs */
100262306a36Sopenharmony_ci	if (bits & GMAP_NOTIFY_SHADOW)
100362306a36Sopenharmony_ci		return -EINVAL;
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	return 0;
100662306a36Sopenharmony_ci}
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci/*
100962306a36Sopenharmony_ci * gmap_protect_pte - remove access rights to memory and set pgste bits
101062306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
101162306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
101262306a36Sopenharmony_ci * @pmdp: pointer to the pmd associated with the pte
101362306a36Sopenharmony_ci * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
101462306a36Sopenharmony_ci * @bits: notification bits to set
101562306a36Sopenharmony_ci *
101662306a36Sopenharmony_ci * Returns 0 if successfully protected, -ENOMEM if out of memory and
101762306a36Sopenharmony_ci * -EAGAIN if a fixup is needed.
101862306a36Sopenharmony_ci *
101962306a36Sopenharmony_ci * Expected to be called with sg->mm->mmap_lock in read
102062306a36Sopenharmony_ci */
102162306a36Sopenharmony_cistatic int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
102262306a36Sopenharmony_ci			    pmd_t *pmdp, int prot, unsigned long bits)
102362306a36Sopenharmony_ci{
102462306a36Sopenharmony_ci	int rc;
102562306a36Sopenharmony_ci	pte_t *ptep;
102662306a36Sopenharmony_ci	spinlock_t *ptl;
102762306a36Sopenharmony_ci	unsigned long pbits = 0;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
103062306a36Sopenharmony_ci		return -EAGAIN;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
103362306a36Sopenharmony_ci	if (!ptep)
103462306a36Sopenharmony_ci		return -ENOMEM;
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci	pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
103762306a36Sopenharmony_ci	pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
103862306a36Sopenharmony_ci	/* Protect and unlock. */
103962306a36Sopenharmony_ci	rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
104062306a36Sopenharmony_ci	gmap_pte_op_end(ptep, ptl);
104162306a36Sopenharmony_ci	return rc;
104262306a36Sopenharmony_ci}
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci/*
104562306a36Sopenharmony_ci * gmap_protect_range - remove access rights to memory and set pgste bits
104662306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
104762306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
104862306a36Sopenharmony_ci * @len: size of area
104962306a36Sopenharmony_ci * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
105062306a36Sopenharmony_ci * @bits: pgste notification bits to set
105162306a36Sopenharmony_ci *
105262306a36Sopenharmony_ci * Returns 0 if successfully protected, -ENOMEM if out of memory and
105362306a36Sopenharmony_ci * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
105462306a36Sopenharmony_ci *
105562306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
105662306a36Sopenharmony_ci */
105762306a36Sopenharmony_cistatic int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
105862306a36Sopenharmony_ci			      unsigned long len, int prot, unsigned long bits)
105962306a36Sopenharmony_ci{
106062306a36Sopenharmony_ci	unsigned long vmaddr, dist;
106162306a36Sopenharmony_ci	pmd_t *pmdp;
106262306a36Sopenharmony_ci	int rc;
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(gmap));
106562306a36Sopenharmony_ci	while (len) {
106662306a36Sopenharmony_ci		rc = -EAGAIN;
106762306a36Sopenharmony_ci		pmdp = gmap_pmd_op_walk(gmap, gaddr);
106862306a36Sopenharmony_ci		if (pmdp) {
106962306a36Sopenharmony_ci			if (!pmd_large(*pmdp)) {
107062306a36Sopenharmony_ci				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
107162306a36Sopenharmony_ci						      bits);
107262306a36Sopenharmony_ci				if (!rc) {
107362306a36Sopenharmony_ci					len -= PAGE_SIZE;
107462306a36Sopenharmony_ci					gaddr += PAGE_SIZE;
107562306a36Sopenharmony_ci				}
107662306a36Sopenharmony_ci			} else {
107762306a36Sopenharmony_ci				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
107862306a36Sopenharmony_ci						      bits);
107962306a36Sopenharmony_ci				if (!rc) {
108062306a36Sopenharmony_ci					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
108162306a36Sopenharmony_ci					len = len < dist ? 0 : len - dist;
108262306a36Sopenharmony_ci					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
108362306a36Sopenharmony_ci				}
108462306a36Sopenharmony_ci			}
108562306a36Sopenharmony_ci			gmap_pmd_op_end(gmap, pmdp);
108662306a36Sopenharmony_ci		}
108762306a36Sopenharmony_ci		if (rc) {
108862306a36Sopenharmony_ci			if (rc == -EINVAL)
108962306a36Sopenharmony_ci				return rc;
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci			/* -EAGAIN, fixup of userspace mm and gmap */
109262306a36Sopenharmony_ci			vmaddr = __gmap_translate(gmap, gaddr);
109362306a36Sopenharmony_ci			if (IS_ERR_VALUE(vmaddr))
109462306a36Sopenharmony_ci				return vmaddr;
109562306a36Sopenharmony_ci			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
109662306a36Sopenharmony_ci			if (rc)
109762306a36Sopenharmony_ci				return rc;
109862306a36Sopenharmony_ci		}
109962306a36Sopenharmony_ci	}
110062306a36Sopenharmony_ci	return 0;
110162306a36Sopenharmony_ci}
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci/**
110462306a36Sopenharmony_ci * gmap_mprotect_notify - change access rights for a range of ptes and
110562306a36Sopenharmony_ci *                        call the notifier if any pte changes again
110662306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
110762306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
110862306a36Sopenharmony_ci * @len: size of area
110962306a36Sopenharmony_ci * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
111062306a36Sopenharmony_ci *
111162306a36Sopenharmony_ci * Returns 0 if for each page in the given range a gmap mapping exists,
111262306a36Sopenharmony_ci * the new access rights could be set and the notifier could be armed.
111362306a36Sopenharmony_ci * If the gmap mapping is missing for one or more pages -EFAULT is
111462306a36Sopenharmony_ci * returned. If no memory could be allocated -ENOMEM is returned.
111562306a36Sopenharmony_ci * This function establishes missing page table entries.
111662306a36Sopenharmony_ci */
111762306a36Sopenharmony_ciint gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
111862306a36Sopenharmony_ci			 unsigned long len, int prot)
111962306a36Sopenharmony_ci{
112062306a36Sopenharmony_ci	int rc;
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
112362306a36Sopenharmony_ci		return -EINVAL;
112462306a36Sopenharmony_ci	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
112562306a36Sopenharmony_ci		return -EINVAL;
112662306a36Sopenharmony_ci	mmap_read_lock(gmap->mm);
112762306a36Sopenharmony_ci	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
112862306a36Sopenharmony_ci	mmap_read_unlock(gmap->mm);
112962306a36Sopenharmony_ci	return rc;
113062306a36Sopenharmony_ci}
113162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_mprotect_notify);
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci/**
113462306a36Sopenharmony_ci * gmap_read_table - get an unsigned long value from a guest page table using
113562306a36Sopenharmony_ci *                   absolute addressing, without marking the page referenced.
113662306a36Sopenharmony_ci * @gmap: pointer to guest mapping meta data structure
113762306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
113862306a36Sopenharmony_ci * @val: pointer to the unsigned long value to return
113962306a36Sopenharmony_ci *
114062306a36Sopenharmony_ci * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
114162306a36Sopenharmony_ci * if reading using the virtual address failed. -EINVAL if called on a gmap
114262306a36Sopenharmony_ci * shadow.
114362306a36Sopenharmony_ci *
114462306a36Sopenharmony_ci * Called with gmap->mm->mmap_lock in read.
114562306a36Sopenharmony_ci */
114662306a36Sopenharmony_ciint gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
114762306a36Sopenharmony_ci{
114862306a36Sopenharmony_ci	unsigned long address, vmaddr;
114962306a36Sopenharmony_ci	spinlock_t *ptl;
115062306a36Sopenharmony_ci	pte_t *ptep, pte;
115162306a36Sopenharmony_ci	int rc;
115262306a36Sopenharmony_ci
115362306a36Sopenharmony_ci	if (gmap_is_shadow(gmap))
115462306a36Sopenharmony_ci		return -EINVAL;
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci	while (1) {
115762306a36Sopenharmony_ci		rc = -EAGAIN;
115862306a36Sopenharmony_ci		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
115962306a36Sopenharmony_ci		if (ptep) {
116062306a36Sopenharmony_ci			pte = *ptep;
116162306a36Sopenharmony_ci			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
116262306a36Sopenharmony_ci				address = pte_val(pte) & PAGE_MASK;
116362306a36Sopenharmony_ci				address += gaddr & ~PAGE_MASK;
116462306a36Sopenharmony_ci				*val = *(unsigned long *)__va(address);
116562306a36Sopenharmony_ci				set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
116662306a36Sopenharmony_ci				/* Do *NOT* clear the _PAGE_INVALID bit! */
116762306a36Sopenharmony_ci				rc = 0;
116862306a36Sopenharmony_ci			}
116962306a36Sopenharmony_ci			gmap_pte_op_end(ptep, ptl);
117062306a36Sopenharmony_ci		}
117162306a36Sopenharmony_ci		if (!rc)
117262306a36Sopenharmony_ci			break;
117362306a36Sopenharmony_ci		vmaddr = __gmap_translate(gmap, gaddr);
117462306a36Sopenharmony_ci		if (IS_ERR_VALUE(vmaddr)) {
117562306a36Sopenharmony_ci			rc = vmaddr;
117662306a36Sopenharmony_ci			break;
117762306a36Sopenharmony_ci		}
117862306a36Sopenharmony_ci		rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
117962306a36Sopenharmony_ci		if (rc)
118062306a36Sopenharmony_ci			break;
118162306a36Sopenharmony_ci	}
118262306a36Sopenharmony_ci	return rc;
118362306a36Sopenharmony_ci}
118462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_read_table);
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci/**
118762306a36Sopenharmony_ci * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
118862306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
118962306a36Sopenharmony_ci * @vmaddr: vm address associated with the rmap
119062306a36Sopenharmony_ci * @rmap: pointer to the rmap structure
119162306a36Sopenharmony_ci *
119262306a36Sopenharmony_ci * Called with the sg->guest_table_lock
119362306a36Sopenharmony_ci */
119462306a36Sopenharmony_cistatic inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
119562306a36Sopenharmony_ci				    struct gmap_rmap *rmap)
119662306a36Sopenharmony_ci{
119762306a36Sopenharmony_ci	struct gmap_rmap *temp;
119862306a36Sopenharmony_ci	void __rcu **slot;
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
120162306a36Sopenharmony_ci	slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
120262306a36Sopenharmony_ci	if (slot) {
120362306a36Sopenharmony_ci		rmap->next = radix_tree_deref_slot_protected(slot,
120462306a36Sopenharmony_ci							&sg->guest_table_lock);
120562306a36Sopenharmony_ci		for (temp = rmap->next; temp; temp = temp->next) {
120662306a36Sopenharmony_ci			if (temp->raddr == rmap->raddr) {
120762306a36Sopenharmony_ci				kfree(rmap);
120862306a36Sopenharmony_ci				return;
120962306a36Sopenharmony_ci			}
121062306a36Sopenharmony_ci		}
121162306a36Sopenharmony_ci		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
121262306a36Sopenharmony_ci	} else {
121362306a36Sopenharmony_ci		rmap->next = NULL;
121462306a36Sopenharmony_ci		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
121562306a36Sopenharmony_ci				  rmap);
121662306a36Sopenharmony_ci	}
121762306a36Sopenharmony_ci}
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci/**
122062306a36Sopenharmony_ci * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
122162306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
122262306a36Sopenharmony_ci * @raddr: rmap address in the shadow gmap
122362306a36Sopenharmony_ci * @paddr: address in the parent guest address space
122462306a36Sopenharmony_ci * @len: length of the memory area to protect
122562306a36Sopenharmony_ci *
122662306a36Sopenharmony_ci * Returns 0 if successfully protected and the rmap was created, -ENOMEM
122762306a36Sopenharmony_ci * if out of memory and -EFAULT if paddr is invalid.
122862306a36Sopenharmony_ci */
122962306a36Sopenharmony_cistatic int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
123062306a36Sopenharmony_ci			     unsigned long paddr, unsigned long len)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	struct gmap *parent;
123362306a36Sopenharmony_ci	struct gmap_rmap *rmap;
123462306a36Sopenharmony_ci	unsigned long vmaddr;
123562306a36Sopenharmony_ci	spinlock_t *ptl;
123662306a36Sopenharmony_ci	pte_t *ptep;
123762306a36Sopenharmony_ci	int rc;
123862306a36Sopenharmony_ci
123962306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
124062306a36Sopenharmony_ci	parent = sg->parent;
124162306a36Sopenharmony_ci	while (len) {
124262306a36Sopenharmony_ci		vmaddr = __gmap_translate(parent, paddr);
124362306a36Sopenharmony_ci		if (IS_ERR_VALUE(vmaddr))
124462306a36Sopenharmony_ci			return vmaddr;
124562306a36Sopenharmony_ci		rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
124662306a36Sopenharmony_ci		if (!rmap)
124762306a36Sopenharmony_ci			return -ENOMEM;
124862306a36Sopenharmony_ci		rmap->raddr = raddr;
124962306a36Sopenharmony_ci		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
125062306a36Sopenharmony_ci		if (rc) {
125162306a36Sopenharmony_ci			kfree(rmap);
125262306a36Sopenharmony_ci			return rc;
125362306a36Sopenharmony_ci		}
125462306a36Sopenharmony_ci		rc = -EAGAIN;
125562306a36Sopenharmony_ci		ptep = gmap_pte_op_walk(parent, paddr, &ptl);
125662306a36Sopenharmony_ci		if (ptep) {
125762306a36Sopenharmony_ci			spin_lock(&sg->guest_table_lock);
125862306a36Sopenharmony_ci			rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
125962306a36Sopenharmony_ci					     PGSTE_VSIE_BIT);
126062306a36Sopenharmony_ci			if (!rc)
126162306a36Sopenharmony_ci				gmap_insert_rmap(sg, vmaddr, rmap);
126262306a36Sopenharmony_ci			spin_unlock(&sg->guest_table_lock);
126362306a36Sopenharmony_ci			gmap_pte_op_end(ptep, ptl);
126462306a36Sopenharmony_ci		}
126562306a36Sopenharmony_ci		radix_tree_preload_end();
126662306a36Sopenharmony_ci		if (rc) {
126762306a36Sopenharmony_ci			kfree(rmap);
126862306a36Sopenharmony_ci			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
126962306a36Sopenharmony_ci			if (rc)
127062306a36Sopenharmony_ci				return rc;
127162306a36Sopenharmony_ci			continue;
127262306a36Sopenharmony_ci		}
127362306a36Sopenharmony_ci		paddr += PAGE_SIZE;
127462306a36Sopenharmony_ci		len -= PAGE_SIZE;
127562306a36Sopenharmony_ci	}
127662306a36Sopenharmony_ci	return 0;
127762306a36Sopenharmony_ci}
127862306a36Sopenharmony_ci
127962306a36Sopenharmony_ci#define _SHADOW_RMAP_MASK	0x7
128062306a36Sopenharmony_ci#define _SHADOW_RMAP_REGION1	0x5
128162306a36Sopenharmony_ci#define _SHADOW_RMAP_REGION2	0x4
128262306a36Sopenharmony_ci#define _SHADOW_RMAP_REGION3	0x3
128362306a36Sopenharmony_ci#define _SHADOW_RMAP_SEGMENT	0x2
128462306a36Sopenharmony_ci#define _SHADOW_RMAP_PGTABLE	0x1
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci/**
128762306a36Sopenharmony_ci * gmap_idte_one - invalidate a single region or segment table entry
128862306a36Sopenharmony_ci * @asce: region or segment table *origin* + table-type bits
128962306a36Sopenharmony_ci * @vaddr: virtual address to identify the table entry to flush
129062306a36Sopenharmony_ci *
129162306a36Sopenharmony_ci * The invalid bit of a single region or segment table entry is set
129262306a36Sopenharmony_ci * and the associated TLB entries depending on the entry are flushed.
129362306a36Sopenharmony_ci * The table-type of the @asce identifies the portion of the @vaddr
129462306a36Sopenharmony_ci * that is used as the invalidation index.
129562306a36Sopenharmony_ci */
129662306a36Sopenharmony_cistatic inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
129762306a36Sopenharmony_ci{
129862306a36Sopenharmony_ci	asm volatile(
129962306a36Sopenharmony_ci		"	idte	%0,0,%1"
130062306a36Sopenharmony_ci		: : "a" (asce), "a" (vaddr) : "cc", "memory");
130162306a36Sopenharmony_ci}
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci/**
130462306a36Sopenharmony_ci * gmap_unshadow_page - remove a page from a shadow page table
130562306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
130662306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
130762306a36Sopenharmony_ci *
130862306a36Sopenharmony_ci * Called with the sg->guest_table_lock
130962306a36Sopenharmony_ci */
131062306a36Sopenharmony_cistatic void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
131162306a36Sopenharmony_ci{
131262306a36Sopenharmony_ci	unsigned long *table;
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
131562306a36Sopenharmony_ci	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
131662306a36Sopenharmony_ci	if (!table || *table & _PAGE_INVALID)
131762306a36Sopenharmony_ci		return;
131862306a36Sopenharmony_ci	gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
131962306a36Sopenharmony_ci	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
132062306a36Sopenharmony_ci}
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci/**
132362306a36Sopenharmony_ci * __gmap_unshadow_pgt - remove all entries from a shadow page table
132462306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
132562306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
132662306a36Sopenharmony_ci * @pgt: pointer to the start of a shadow page table
132762306a36Sopenharmony_ci *
132862306a36Sopenharmony_ci * Called with the sg->guest_table_lock
132962306a36Sopenharmony_ci */
133062306a36Sopenharmony_cistatic void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
133162306a36Sopenharmony_ci				unsigned long *pgt)
133262306a36Sopenharmony_ci{
133362306a36Sopenharmony_ci	int i;
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
133662306a36Sopenharmony_ci	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
133762306a36Sopenharmony_ci		pgt[i] = _PAGE_INVALID;
133862306a36Sopenharmony_ci}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci/**
134162306a36Sopenharmony_ci * gmap_unshadow_pgt - remove a shadow page table from a segment entry
134262306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
134362306a36Sopenharmony_ci * @raddr: address in the shadow guest address space
134462306a36Sopenharmony_ci *
134562306a36Sopenharmony_ci * Called with the sg->guest_table_lock
134662306a36Sopenharmony_ci */
134762306a36Sopenharmony_cistatic void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
134862306a36Sopenharmony_ci{
134962306a36Sopenharmony_ci	unsigned long *ste;
135062306a36Sopenharmony_ci	phys_addr_t sto, pgt;
135162306a36Sopenharmony_ci	struct page *page;
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
135462306a36Sopenharmony_ci	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
135562306a36Sopenharmony_ci	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
135662306a36Sopenharmony_ci		return;
135762306a36Sopenharmony_ci	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
135862306a36Sopenharmony_ci	sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
135962306a36Sopenharmony_ci	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
136062306a36Sopenharmony_ci	pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
136162306a36Sopenharmony_ci	*ste = _SEGMENT_ENTRY_EMPTY;
136262306a36Sopenharmony_ci	__gmap_unshadow_pgt(sg, raddr, __va(pgt));
136362306a36Sopenharmony_ci	/* Free page table */
136462306a36Sopenharmony_ci	page = phys_to_page(pgt);
136562306a36Sopenharmony_ci	list_del(&page->lru);
136662306a36Sopenharmony_ci	page_table_free_pgste(page);
136762306a36Sopenharmony_ci}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci/**
137062306a36Sopenharmony_ci * __gmap_unshadow_sgt - remove all entries from a shadow segment table
137162306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
137262306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
137362306a36Sopenharmony_ci * @sgt: pointer to the start of a shadow segment table
137462306a36Sopenharmony_ci *
137562306a36Sopenharmony_ci * Called with the sg->guest_table_lock
137662306a36Sopenharmony_ci */
137762306a36Sopenharmony_cistatic void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
137862306a36Sopenharmony_ci				unsigned long *sgt)
137962306a36Sopenharmony_ci{
138062306a36Sopenharmony_ci	struct page *page;
138162306a36Sopenharmony_ci	phys_addr_t pgt;
138262306a36Sopenharmony_ci	int i;
138362306a36Sopenharmony_ci
138462306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
138562306a36Sopenharmony_ci	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
138662306a36Sopenharmony_ci		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
138762306a36Sopenharmony_ci			continue;
138862306a36Sopenharmony_ci		pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
138962306a36Sopenharmony_ci		sgt[i] = _SEGMENT_ENTRY_EMPTY;
139062306a36Sopenharmony_ci		__gmap_unshadow_pgt(sg, raddr, __va(pgt));
139162306a36Sopenharmony_ci		/* Free page table */
139262306a36Sopenharmony_ci		page = phys_to_page(pgt);
139362306a36Sopenharmony_ci		list_del(&page->lru);
139462306a36Sopenharmony_ci		page_table_free_pgste(page);
139562306a36Sopenharmony_ci	}
139662306a36Sopenharmony_ci}
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci/**
139962306a36Sopenharmony_ci * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
140062306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
140162306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
140262306a36Sopenharmony_ci *
140362306a36Sopenharmony_ci * Called with the shadow->guest_table_lock
140462306a36Sopenharmony_ci */
140562306a36Sopenharmony_cistatic void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
140662306a36Sopenharmony_ci{
140762306a36Sopenharmony_ci	unsigned long r3o, *r3e;
140862306a36Sopenharmony_ci	phys_addr_t sgt;
140962306a36Sopenharmony_ci	struct page *page;
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
141262306a36Sopenharmony_ci	r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
141362306a36Sopenharmony_ci	if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
141462306a36Sopenharmony_ci		return;
141562306a36Sopenharmony_ci	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
141662306a36Sopenharmony_ci	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
141762306a36Sopenharmony_ci	gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
141862306a36Sopenharmony_ci	sgt = *r3e & _REGION_ENTRY_ORIGIN;
141962306a36Sopenharmony_ci	*r3e = _REGION3_ENTRY_EMPTY;
142062306a36Sopenharmony_ci	__gmap_unshadow_sgt(sg, raddr, __va(sgt));
142162306a36Sopenharmony_ci	/* Free segment table */
142262306a36Sopenharmony_ci	page = phys_to_page(sgt);
142362306a36Sopenharmony_ci	list_del(&page->lru);
142462306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
142562306a36Sopenharmony_ci}
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci/**
142862306a36Sopenharmony_ci * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
142962306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
143062306a36Sopenharmony_ci * @raddr: address in the shadow guest address space
143162306a36Sopenharmony_ci * @r3t: pointer to the start of a shadow region-3 table
143262306a36Sopenharmony_ci *
143362306a36Sopenharmony_ci * Called with the sg->guest_table_lock
143462306a36Sopenharmony_ci */
143562306a36Sopenharmony_cistatic void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
143662306a36Sopenharmony_ci				unsigned long *r3t)
143762306a36Sopenharmony_ci{
143862306a36Sopenharmony_ci	struct page *page;
143962306a36Sopenharmony_ci	phys_addr_t sgt;
144062306a36Sopenharmony_ci	int i;
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
144362306a36Sopenharmony_ci	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
144462306a36Sopenharmony_ci		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
144562306a36Sopenharmony_ci			continue;
144662306a36Sopenharmony_ci		sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
144762306a36Sopenharmony_ci		r3t[i] = _REGION3_ENTRY_EMPTY;
144862306a36Sopenharmony_ci		__gmap_unshadow_sgt(sg, raddr, __va(sgt));
144962306a36Sopenharmony_ci		/* Free segment table */
145062306a36Sopenharmony_ci		page = phys_to_page(sgt);
145162306a36Sopenharmony_ci		list_del(&page->lru);
145262306a36Sopenharmony_ci		__free_pages(page, CRST_ALLOC_ORDER);
145362306a36Sopenharmony_ci	}
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ci/**
145762306a36Sopenharmony_ci * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
145862306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
145962306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
146062306a36Sopenharmony_ci *
146162306a36Sopenharmony_ci * Called with the sg->guest_table_lock
146262306a36Sopenharmony_ci */
146362306a36Sopenharmony_cistatic void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
146462306a36Sopenharmony_ci{
146562306a36Sopenharmony_ci	unsigned long r2o, *r2e;
146662306a36Sopenharmony_ci	phys_addr_t r3t;
146762306a36Sopenharmony_ci	struct page *page;
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
147062306a36Sopenharmony_ci	r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
147162306a36Sopenharmony_ci	if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
147262306a36Sopenharmony_ci		return;
147362306a36Sopenharmony_ci	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
147462306a36Sopenharmony_ci	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
147562306a36Sopenharmony_ci	gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
147662306a36Sopenharmony_ci	r3t = *r2e & _REGION_ENTRY_ORIGIN;
147762306a36Sopenharmony_ci	*r2e = _REGION2_ENTRY_EMPTY;
147862306a36Sopenharmony_ci	__gmap_unshadow_r3t(sg, raddr, __va(r3t));
147962306a36Sopenharmony_ci	/* Free region 3 table */
148062306a36Sopenharmony_ci	page = phys_to_page(r3t);
148162306a36Sopenharmony_ci	list_del(&page->lru);
148262306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
148362306a36Sopenharmony_ci}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci/**
148662306a36Sopenharmony_ci * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
148762306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
148862306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
148962306a36Sopenharmony_ci * @r2t: pointer to the start of a shadow region-2 table
149062306a36Sopenharmony_ci *
149162306a36Sopenharmony_ci * Called with the sg->guest_table_lock
149262306a36Sopenharmony_ci */
149362306a36Sopenharmony_cistatic void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
149462306a36Sopenharmony_ci				unsigned long *r2t)
149562306a36Sopenharmony_ci{
149662306a36Sopenharmony_ci	phys_addr_t r3t;
149762306a36Sopenharmony_ci	struct page *page;
149862306a36Sopenharmony_ci	int i;
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
150162306a36Sopenharmony_ci	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
150262306a36Sopenharmony_ci		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
150362306a36Sopenharmony_ci			continue;
150462306a36Sopenharmony_ci		r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
150562306a36Sopenharmony_ci		r2t[i] = _REGION2_ENTRY_EMPTY;
150662306a36Sopenharmony_ci		__gmap_unshadow_r3t(sg, raddr, __va(r3t));
150762306a36Sopenharmony_ci		/* Free region 3 table */
150862306a36Sopenharmony_ci		page = phys_to_page(r3t);
150962306a36Sopenharmony_ci		list_del(&page->lru);
151062306a36Sopenharmony_ci		__free_pages(page, CRST_ALLOC_ORDER);
151162306a36Sopenharmony_ci	}
151262306a36Sopenharmony_ci}
151362306a36Sopenharmony_ci
151462306a36Sopenharmony_ci/**
151562306a36Sopenharmony_ci * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
151662306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
151762306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
151862306a36Sopenharmony_ci *
151962306a36Sopenharmony_ci * Called with the sg->guest_table_lock
152062306a36Sopenharmony_ci */
152162306a36Sopenharmony_cistatic void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
152262306a36Sopenharmony_ci{
152362306a36Sopenharmony_ci	unsigned long r1o, *r1e;
152462306a36Sopenharmony_ci	struct page *page;
152562306a36Sopenharmony_ci	phys_addr_t r2t;
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
152862306a36Sopenharmony_ci	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
152962306a36Sopenharmony_ci	if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
153062306a36Sopenharmony_ci		return;
153162306a36Sopenharmony_ci	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
153262306a36Sopenharmony_ci	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
153362306a36Sopenharmony_ci	gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
153462306a36Sopenharmony_ci	r2t = *r1e & _REGION_ENTRY_ORIGIN;
153562306a36Sopenharmony_ci	*r1e = _REGION1_ENTRY_EMPTY;
153662306a36Sopenharmony_ci	__gmap_unshadow_r2t(sg, raddr, __va(r2t));
153762306a36Sopenharmony_ci	/* Free region 2 table */
153862306a36Sopenharmony_ci	page = phys_to_page(r2t);
153962306a36Sopenharmony_ci	list_del(&page->lru);
154062306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
154162306a36Sopenharmony_ci}
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci/**
154462306a36Sopenharmony_ci * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
154562306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
154662306a36Sopenharmony_ci * @raddr: rmap address in the shadow guest address space
154762306a36Sopenharmony_ci * @r1t: pointer to the start of a shadow region-1 table
154862306a36Sopenharmony_ci *
154962306a36Sopenharmony_ci * Called with the shadow->guest_table_lock
155062306a36Sopenharmony_ci */
155162306a36Sopenharmony_cistatic void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
155262306a36Sopenharmony_ci				unsigned long *r1t)
155362306a36Sopenharmony_ci{
155462306a36Sopenharmony_ci	unsigned long asce;
155562306a36Sopenharmony_ci	struct page *page;
155662306a36Sopenharmony_ci	phys_addr_t r2t;
155762306a36Sopenharmony_ci	int i;
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
156062306a36Sopenharmony_ci	asce = __pa(r1t) | _ASCE_TYPE_REGION1;
156162306a36Sopenharmony_ci	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
156262306a36Sopenharmony_ci		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
156362306a36Sopenharmony_ci			continue;
156462306a36Sopenharmony_ci		r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
156562306a36Sopenharmony_ci		__gmap_unshadow_r2t(sg, raddr, __va(r2t));
156662306a36Sopenharmony_ci		/* Clear entry and flush translation r1t -> r2t */
156762306a36Sopenharmony_ci		gmap_idte_one(asce, raddr);
156862306a36Sopenharmony_ci		r1t[i] = _REGION1_ENTRY_EMPTY;
156962306a36Sopenharmony_ci		/* Free region 2 table */
157062306a36Sopenharmony_ci		page = phys_to_page(r2t);
157162306a36Sopenharmony_ci		list_del(&page->lru);
157262306a36Sopenharmony_ci		__free_pages(page, CRST_ALLOC_ORDER);
157362306a36Sopenharmony_ci	}
157462306a36Sopenharmony_ci}
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci/**
157762306a36Sopenharmony_ci * gmap_unshadow - remove a shadow page table completely
157862306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
157962306a36Sopenharmony_ci *
158062306a36Sopenharmony_ci * Called with sg->guest_table_lock
158162306a36Sopenharmony_ci */
158262306a36Sopenharmony_cistatic void gmap_unshadow(struct gmap *sg)
158362306a36Sopenharmony_ci{
158462306a36Sopenharmony_ci	unsigned long *table;
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
158762306a36Sopenharmony_ci	if (sg->removed)
158862306a36Sopenharmony_ci		return;
158962306a36Sopenharmony_ci	sg->removed = 1;
159062306a36Sopenharmony_ci	gmap_call_notifier(sg, 0, -1UL);
159162306a36Sopenharmony_ci	gmap_flush_tlb(sg);
159262306a36Sopenharmony_ci	table = __va(sg->asce & _ASCE_ORIGIN);
159362306a36Sopenharmony_ci	switch (sg->asce & _ASCE_TYPE_MASK) {
159462306a36Sopenharmony_ci	case _ASCE_TYPE_REGION1:
159562306a36Sopenharmony_ci		__gmap_unshadow_r1t(sg, 0, table);
159662306a36Sopenharmony_ci		break;
159762306a36Sopenharmony_ci	case _ASCE_TYPE_REGION2:
159862306a36Sopenharmony_ci		__gmap_unshadow_r2t(sg, 0, table);
159962306a36Sopenharmony_ci		break;
160062306a36Sopenharmony_ci	case _ASCE_TYPE_REGION3:
160162306a36Sopenharmony_ci		__gmap_unshadow_r3t(sg, 0, table);
160262306a36Sopenharmony_ci		break;
160362306a36Sopenharmony_ci	case _ASCE_TYPE_SEGMENT:
160462306a36Sopenharmony_ci		__gmap_unshadow_sgt(sg, 0, table);
160562306a36Sopenharmony_ci		break;
160662306a36Sopenharmony_ci	}
160762306a36Sopenharmony_ci}
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci/**
161062306a36Sopenharmony_ci * gmap_find_shadow - find a specific asce in the list of shadow tables
161162306a36Sopenharmony_ci * @parent: pointer to the parent gmap
161262306a36Sopenharmony_ci * @asce: ASCE for which the shadow table is created
161362306a36Sopenharmony_ci * @edat_level: edat level to be used for the shadow translation
161462306a36Sopenharmony_ci *
161562306a36Sopenharmony_ci * Returns the pointer to a gmap if a shadow table with the given asce is
161662306a36Sopenharmony_ci * already available, ERR_PTR(-EAGAIN) if another one is just being created,
161762306a36Sopenharmony_ci * otherwise NULL
161862306a36Sopenharmony_ci */
161962306a36Sopenharmony_cistatic struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
162062306a36Sopenharmony_ci				     int edat_level)
162162306a36Sopenharmony_ci{
162262306a36Sopenharmony_ci	struct gmap *sg;
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci	list_for_each_entry(sg, &parent->children, list) {
162562306a36Sopenharmony_ci		if (sg->orig_asce != asce || sg->edat_level != edat_level ||
162662306a36Sopenharmony_ci		    sg->removed)
162762306a36Sopenharmony_ci			continue;
162862306a36Sopenharmony_ci		if (!sg->initialized)
162962306a36Sopenharmony_ci			return ERR_PTR(-EAGAIN);
163062306a36Sopenharmony_ci		refcount_inc(&sg->ref_count);
163162306a36Sopenharmony_ci		return sg;
163262306a36Sopenharmony_ci	}
163362306a36Sopenharmony_ci	return NULL;
163462306a36Sopenharmony_ci}
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci/**
163762306a36Sopenharmony_ci * gmap_shadow_valid - check if a shadow guest address space matches the
163862306a36Sopenharmony_ci *                     given properties and is still valid
163962306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
164062306a36Sopenharmony_ci * @asce: ASCE for which the shadow table is requested
164162306a36Sopenharmony_ci * @edat_level: edat level to be used for the shadow translation
164262306a36Sopenharmony_ci *
164362306a36Sopenharmony_ci * Returns 1 if the gmap shadow is still valid and matches the given
164462306a36Sopenharmony_ci * properties, the caller can continue using it. Returns 0 otherwise, the
164562306a36Sopenharmony_ci * caller has to request a new shadow gmap in this case.
164662306a36Sopenharmony_ci *
164762306a36Sopenharmony_ci */
164862306a36Sopenharmony_ciint gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
164962306a36Sopenharmony_ci{
165062306a36Sopenharmony_ci	if (sg->removed)
165162306a36Sopenharmony_ci		return 0;
165262306a36Sopenharmony_ci	return sg->orig_asce == asce && sg->edat_level == edat_level;
165362306a36Sopenharmony_ci}
165462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_valid);
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ci/**
165762306a36Sopenharmony_ci * gmap_shadow - create/find a shadow guest address space
165862306a36Sopenharmony_ci * @parent: pointer to the parent gmap
165962306a36Sopenharmony_ci * @asce: ASCE for which the shadow table is created
166062306a36Sopenharmony_ci * @edat_level: edat level to be used for the shadow translation
166162306a36Sopenharmony_ci *
166262306a36Sopenharmony_ci * The pages of the top level page table referred by the asce parameter
166362306a36Sopenharmony_ci * will be set to read-only and marked in the PGSTEs of the kvm process.
166462306a36Sopenharmony_ci * The shadow table will be removed automatically on any change to the
166562306a36Sopenharmony_ci * PTE mapping for the source table.
166662306a36Sopenharmony_ci *
166762306a36Sopenharmony_ci * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
166862306a36Sopenharmony_ci * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
166962306a36Sopenharmony_ci * parent gmap table could not be protected.
167062306a36Sopenharmony_ci */
167162306a36Sopenharmony_cistruct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
167262306a36Sopenharmony_ci			 int edat_level)
167362306a36Sopenharmony_ci{
167462306a36Sopenharmony_ci	struct gmap *sg, *new;
167562306a36Sopenharmony_ci	unsigned long limit;
167662306a36Sopenharmony_ci	int rc;
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_ci	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
167962306a36Sopenharmony_ci	BUG_ON(gmap_is_shadow(parent));
168062306a36Sopenharmony_ci	spin_lock(&parent->shadow_lock);
168162306a36Sopenharmony_ci	sg = gmap_find_shadow(parent, asce, edat_level);
168262306a36Sopenharmony_ci	spin_unlock(&parent->shadow_lock);
168362306a36Sopenharmony_ci	if (sg)
168462306a36Sopenharmony_ci		return sg;
168562306a36Sopenharmony_ci	/* Create a new shadow gmap */
168662306a36Sopenharmony_ci	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
168762306a36Sopenharmony_ci	if (asce & _ASCE_REAL_SPACE)
168862306a36Sopenharmony_ci		limit = -1UL;
168962306a36Sopenharmony_ci	new = gmap_alloc(limit);
169062306a36Sopenharmony_ci	if (!new)
169162306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
169262306a36Sopenharmony_ci	new->mm = parent->mm;
169362306a36Sopenharmony_ci	new->parent = gmap_get(parent);
169462306a36Sopenharmony_ci	new->private = parent->private;
169562306a36Sopenharmony_ci	new->orig_asce = asce;
169662306a36Sopenharmony_ci	new->edat_level = edat_level;
169762306a36Sopenharmony_ci	new->initialized = false;
169862306a36Sopenharmony_ci	spin_lock(&parent->shadow_lock);
169962306a36Sopenharmony_ci	/* Recheck if another CPU created the same shadow */
170062306a36Sopenharmony_ci	sg = gmap_find_shadow(parent, asce, edat_level);
170162306a36Sopenharmony_ci	if (sg) {
170262306a36Sopenharmony_ci		spin_unlock(&parent->shadow_lock);
170362306a36Sopenharmony_ci		gmap_free(new);
170462306a36Sopenharmony_ci		return sg;
170562306a36Sopenharmony_ci	}
170662306a36Sopenharmony_ci	if (asce & _ASCE_REAL_SPACE) {
170762306a36Sopenharmony_ci		/* only allow one real-space gmap shadow */
170862306a36Sopenharmony_ci		list_for_each_entry(sg, &parent->children, list) {
170962306a36Sopenharmony_ci			if (sg->orig_asce & _ASCE_REAL_SPACE) {
171062306a36Sopenharmony_ci				spin_lock(&sg->guest_table_lock);
171162306a36Sopenharmony_ci				gmap_unshadow(sg);
171262306a36Sopenharmony_ci				spin_unlock(&sg->guest_table_lock);
171362306a36Sopenharmony_ci				list_del(&sg->list);
171462306a36Sopenharmony_ci				gmap_put(sg);
171562306a36Sopenharmony_ci				break;
171662306a36Sopenharmony_ci			}
171762306a36Sopenharmony_ci		}
171862306a36Sopenharmony_ci	}
171962306a36Sopenharmony_ci	refcount_set(&new->ref_count, 2);
172062306a36Sopenharmony_ci	list_add(&new->list, &parent->children);
172162306a36Sopenharmony_ci	if (asce & _ASCE_REAL_SPACE) {
172262306a36Sopenharmony_ci		/* nothing to protect, return right away */
172362306a36Sopenharmony_ci		new->initialized = true;
172462306a36Sopenharmony_ci		spin_unlock(&parent->shadow_lock);
172562306a36Sopenharmony_ci		return new;
172662306a36Sopenharmony_ci	}
172762306a36Sopenharmony_ci	spin_unlock(&parent->shadow_lock);
172862306a36Sopenharmony_ci	/* protect after insertion, so it will get properly invalidated */
172962306a36Sopenharmony_ci	mmap_read_lock(parent->mm);
173062306a36Sopenharmony_ci	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
173162306a36Sopenharmony_ci				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
173262306a36Sopenharmony_ci				PROT_READ, GMAP_NOTIFY_SHADOW);
173362306a36Sopenharmony_ci	mmap_read_unlock(parent->mm);
173462306a36Sopenharmony_ci	spin_lock(&parent->shadow_lock);
173562306a36Sopenharmony_ci	new->initialized = true;
173662306a36Sopenharmony_ci	if (rc) {
173762306a36Sopenharmony_ci		list_del(&new->list);
173862306a36Sopenharmony_ci		gmap_free(new);
173962306a36Sopenharmony_ci		new = ERR_PTR(rc);
174062306a36Sopenharmony_ci	}
174162306a36Sopenharmony_ci	spin_unlock(&parent->shadow_lock);
174262306a36Sopenharmony_ci	return new;
174362306a36Sopenharmony_ci}
174462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow);
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_ci/**
174762306a36Sopenharmony_ci * gmap_shadow_r2t - create an empty shadow region 2 table
174862306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
174962306a36Sopenharmony_ci * @saddr: faulting address in the shadow gmap
175062306a36Sopenharmony_ci * @r2t: parent gmap address of the region 2 table to get shadowed
175162306a36Sopenharmony_ci * @fake: r2t references contiguous guest memory block, not a r2t
175262306a36Sopenharmony_ci *
175362306a36Sopenharmony_ci * The r2t parameter specifies the address of the source table. The
175462306a36Sopenharmony_ci * four pages of the source table are made read-only in the parent gmap
175562306a36Sopenharmony_ci * address space. A write to the source table area @r2t will automatically
175662306a36Sopenharmony_ci * remove the shadow r2 table and all of its descendants.
175762306a36Sopenharmony_ci *
175862306a36Sopenharmony_ci * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
175962306a36Sopenharmony_ci * shadow table structure is incomplete, -ENOMEM if out of memory and
176062306a36Sopenharmony_ci * -EFAULT if an address in the parent gmap could not be resolved.
176162306a36Sopenharmony_ci *
176262306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
176362306a36Sopenharmony_ci */
176462306a36Sopenharmony_ciint gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
176562306a36Sopenharmony_ci		    int fake)
176662306a36Sopenharmony_ci{
176762306a36Sopenharmony_ci	unsigned long raddr, origin, offset, len;
176862306a36Sopenharmony_ci	unsigned long *table;
176962306a36Sopenharmony_ci	phys_addr_t s_r2t;
177062306a36Sopenharmony_ci	struct page *page;
177162306a36Sopenharmony_ci	int rc;
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
177462306a36Sopenharmony_ci	/* Allocate a shadow region second table */
177562306a36Sopenharmony_ci	page = gmap_alloc_crst();
177662306a36Sopenharmony_ci	if (!page)
177762306a36Sopenharmony_ci		return -ENOMEM;
177862306a36Sopenharmony_ci	page->index = r2t & _REGION_ENTRY_ORIGIN;
177962306a36Sopenharmony_ci	if (fake)
178062306a36Sopenharmony_ci		page->index |= GMAP_SHADOW_FAKE_TABLE;
178162306a36Sopenharmony_ci	s_r2t = page_to_phys(page);
178262306a36Sopenharmony_ci	/* Install shadow region second table */
178362306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
178462306a36Sopenharmony_ci	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
178562306a36Sopenharmony_ci	if (!table) {
178662306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with unshadow */
178762306a36Sopenharmony_ci		goto out_free;
178862306a36Sopenharmony_ci	}
178962306a36Sopenharmony_ci	if (!(*table & _REGION_ENTRY_INVALID)) {
179062306a36Sopenharmony_ci		rc = 0;			/* Already established */
179162306a36Sopenharmony_ci		goto out_free;
179262306a36Sopenharmony_ci	} else if (*table & _REGION_ENTRY_ORIGIN) {
179362306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with shadow */
179462306a36Sopenharmony_ci		goto out_free;
179562306a36Sopenharmony_ci	}
179662306a36Sopenharmony_ci	crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
179762306a36Sopenharmony_ci	/* mark as invalid as long as the parent table is not protected */
179862306a36Sopenharmony_ci	*table = s_r2t | _REGION_ENTRY_LENGTH |
179962306a36Sopenharmony_ci		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
180062306a36Sopenharmony_ci	if (sg->edat_level >= 1)
180162306a36Sopenharmony_ci		*table |= (r2t & _REGION_ENTRY_PROTECT);
180262306a36Sopenharmony_ci	list_add(&page->lru, &sg->crst_list);
180362306a36Sopenharmony_ci	if (fake) {
180462306a36Sopenharmony_ci		/* nothing to protect for fake tables */
180562306a36Sopenharmony_ci		*table &= ~_REGION_ENTRY_INVALID;
180662306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
180762306a36Sopenharmony_ci		return 0;
180862306a36Sopenharmony_ci	}
180962306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
181062306a36Sopenharmony_ci	/* Make r2t read-only in parent gmap page table */
181162306a36Sopenharmony_ci	raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
181262306a36Sopenharmony_ci	origin = r2t & _REGION_ENTRY_ORIGIN;
181362306a36Sopenharmony_ci	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
181462306a36Sopenharmony_ci	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
181562306a36Sopenharmony_ci	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
181662306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
181762306a36Sopenharmony_ci	if (!rc) {
181862306a36Sopenharmony_ci		table = gmap_table_walk(sg, saddr, 4);
181962306a36Sopenharmony_ci		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
182062306a36Sopenharmony_ci			rc = -EAGAIN;		/* Race with unshadow */
182162306a36Sopenharmony_ci		else
182262306a36Sopenharmony_ci			*table &= ~_REGION_ENTRY_INVALID;
182362306a36Sopenharmony_ci	} else {
182462306a36Sopenharmony_ci		gmap_unshadow_r2t(sg, raddr);
182562306a36Sopenharmony_ci	}
182662306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
182762306a36Sopenharmony_ci	return rc;
182862306a36Sopenharmony_ciout_free:
182962306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
183062306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
183162306a36Sopenharmony_ci	return rc;
183262306a36Sopenharmony_ci}
183362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_r2t);
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci/**
183662306a36Sopenharmony_ci * gmap_shadow_r3t - create a shadow region 3 table
183762306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
183862306a36Sopenharmony_ci * @saddr: faulting address in the shadow gmap
183962306a36Sopenharmony_ci * @r3t: parent gmap address of the region 3 table to get shadowed
184062306a36Sopenharmony_ci * @fake: r3t references contiguous guest memory block, not a r3t
184162306a36Sopenharmony_ci *
184262306a36Sopenharmony_ci * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
184362306a36Sopenharmony_ci * shadow table structure is incomplete, -ENOMEM if out of memory and
184462306a36Sopenharmony_ci * -EFAULT if an address in the parent gmap could not be resolved.
184562306a36Sopenharmony_ci *
184662306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
184762306a36Sopenharmony_ci */
184862306a36Sopenharmony_ciint gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
184962306a36Sopenharmony_ci		    int fake)
185062306a36Sopenharmony_ci{
185162306a36Sopenharmony_ci	unsigned long raddr, origin, offset, len;
185262306a36Sopenharmony_ci	unsigned long *table;
185362306a36Sopenharmony_ci	phys_addr_t s_r3t;
185462306a36Sopenharmony_ci	struct page *page;
185562306a36Sopenharmony_ci	int rc;
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
185862306a36Sopenharmony_ci	/* Allocate a shadow region second table */
185962306a36Sopenharmony_ci	page = gmap_alloc_crst();
186062306a36Sopenharmony_ci	if (!page)
186162306a36Sopenharmony_ci		return -ENOMEM;
186262306a36Sopenharmony_ci	page->index = r3t & _REGION_ENTRY_ORIGIN;
186362306a36Sopenharmony_ci	if (fake)
186462306a36Sopenharmony_ci		page->index |= GMAP_SHADOW_FAKE_TABLE;
186562306a36Sopenharmony_ci	s_r3t = page_to_phys(page);
186662306a36Sopenharmony_ci	/* Install shadow region second table */
186762306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
186862306a36Sopenharmony_ci	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
186962306a36Sopenharmony_ci	if (!table) {
187062306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with unshadow */
187162306a36Sopenharmony_ci		goto out_free;
187262306a36Sopenharmony_ci	}
187362306a36Sopenharmony_ci	if (!(*table & _REGION_ENTRY_INVALID)) {
187462306a36Sopenharmony_ci		rc = 0;			/* Already established */
187562306a36Sopenharmony_ci		goto out_free;
187662306a36Sopenharmony_ci	} else if (*table & _REGION_ENTRY_ORIGIN) {
187762306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with shadow */
187862306a36Sopenharmony_ci		goto out_free;
187962306a36Sopenharmony_ci	}
188062306a36Sopenharmony_ci	crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
188162306a36Sopenharmony_ci	/* mark as invalid as long as the parent table is not protected */
188262306a36Sopenharmony_ci	*table = s_r3t | _REGION_ENTRY_LENGTH |
188362306a36Sopenharmony_ci		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
188462306a36Sopenharmony_ci	if (sg->edat_level >= 1)
188562306a36Sopenharmony_ci		*table |= (r3t & _REGION_ENTRY_PROTECT);
188662306a36Sopenharmony_ci	list_add(&page->lru, &sg->crst_list);
188762306a36Sopenharmony_ci	if (fake) {
188862306a36Sopenharmony_ci		/* nothing to protect for fake tables */
188962306a36Sopenharmony_ci		*table &= ~_REGION_ENTRY_INVALID;
189062306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
189162306a36Sopenharmony_ci		return 0;
189262306a36Sopenharmony_ci	}
189362306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
189462306a36Sopenharmony_ci	/* Make r3t read-only in parent gmap page table */
189562306a36Sopenharmony_ci	raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
189662306a36Sopenharmony_ci	origin = r3t & _REGION_ENTRY_ORIGIN;
189762306a36Sopenharmony_ci	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
189862306a36Sopenharmony_ci	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
189962306a36Sopenharmony_ci	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
190062306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
190162306a36Sopenharmony_ci	if (!rc) {
190262306a36Sopenharmony_ci		table = gmap_table_walk(sg, saddr, 3);
190362306a36Sopenharmony_ci		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
190462306a36Sopenharmony_ci			rc = -EAGAIN;		/* Race with unshadow */
190562306a36Sopenharmony_ci		else
190662306a36Sopenharmony_ci			*table &= ~_REGION_ENTRY_INVALID;
190762306a36Sopenharmony_ci	} else {
190862306a36Sopenharmony_ci		gmap_unshadow_r3t(sg, raddr);
190962306a36Sopenharmony_ci	}
191062306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
191162306a36Sopenharmony_ci	return rc;
191262306a36Sopenharmony_ciout_free:
191362306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
191462306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
191562306a36Sopenharmony_ci	return rc;
191662306a36Sopenharmony_ci}
191762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_r3t);
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci/**
192062306a36Sopenharmony_ci * gmap_shadow_sgt - create a shadow segment table
192162306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
192262306a36Sopenharmony_ci * @saddr: faulting address in the shadow gmap
192362306a36Sopenharmony_ci * @sgt: parent gmap address of the segment table to get shadowed
192462306a36Sopenharmony_ci * @fake: sgt references contiguous guest memory block, not a sgt
192562306a36Sopenharmony_ci *
192662306a36Sopenharmony_ci * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
192762306a36Sopenharmony_ci * shadow table structure is incomplete, -ENOMEM if out of memory and
192862306a36Sopenharmony_ci * -EFAULT if an address in the parent gmap could not be resolved.
192962306a36Sopenharmony_ci *
193062306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
193162306a36Sopenharmony_ci */
193262306a36Sopenharmony_ciint gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
193362306a36Sopenharmony_ci		    int fake)
193462306a36Sopenharmony_ci{
193562306a36Sopenharmony_ci	unsigned long raddr, origin, offset, len;
193662306a36Sopenharmony_ci	unsigned long *table;
193762306a36Sopenharmony_ci	phys_addr_t s_sgt;
193862306a36Sopenharmony_ci	struct page *page;
193962306a36Sopenharmony_ci	int rc;
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
194262306a36Sopenharmony_ci	/* Allocate a shadow segment table */
194362306a36Sopenharmony_ci	page = gmap_alloc_crst();
194462306a36Sopenharmony_ci	if (!page)
194562306a36Sopenharmony_ci		return -ENOMEM;
194662306a36Sopenharmony_ci	page->index = sgt & _REGION_ENTRY_ORIGIN;
194762306a36Sopenharmony_ci	if (fake)
194862306a36Sopenharmony_ci		page->index |= GMAP_SHADOW_FAKE_TABLE;
194962306a36Sopenharmony_ci	s_sgt = page_to_phys(page);
195062306a36Sopenharmony_ci	/* Install shadow region second table */
195162306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
195262306a36Sopenharmony_ci	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
195362306a36Sopenharmony_ci	if (!table) {
195462306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with unshadow */
195562306a36Sopenharmony_ci		goto out_free;
195662306a36Sopenharmony_ci	}
195762306a36Sopenharmony_ci	if (!(*table & _REGION_ENTRY_INVALID)) {
195862306a36Sopenharmony_ci		rc = 0;			/* Already established */
195962306a36Sopenharmony_ci		goto out_free;
196062306a36Sopenharmony_ci	} else if (*table & _REGION_ENTRY_ORIGIN) {
196162306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with shadow */
196262306a36Sopenharmony_ci		goto out_free;
196362306a36Sopenharmony_ci	}
196462306a36Sopenharmony_ci	crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
196562306a36Sopenharmony_ci	/* mark as invalid as long as the parent table is not protected */
196662306a36Sopenharmony_ci	*table = s_sgt | _REGION_ENTRY_LENGTH |
196762306a36Sopenharmony_ci		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
196862306a36Sopenharmony_ci	if (sg->edat_level >= 1)
196962306a36Sopenharmony_ci		*table |= sgt & _REGION_ENTRY_PROTECT;
197062306a36Sopenharmony_ci	list_add(&page->lru, &sg->crst_list);
197162306a36Sopenharmony_ci	if (fake) {
197262306a36Sopenharmony_ci		/* nothing to protect for fake tables */
197362306a36Sopenharmony_ci		*table &= ~_REGION_ENTRY_INVALID;
197462306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
197562306a36Sopenharmony_ci		return 0;
197662306a36Sopenharmony_ci	}
197762306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
197862306a36Sopenharmony_ci	/* Make sgt read-only in parent gmap page table */
197962306a36Sopenharmony_ci	raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
198062306a36Sopenharmony_ci	origin = sgt & _REGION_ENTRY_ORIGIN;
198162306a36Sopenharmony_ci	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
198262306a36Sopenharmony_ci	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
198362306a36Sopenharmony_ci	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
198462306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
198562306a36Sopenharmony_ci	if (!rc) {
198662306a36Sopenharmony_ci		table = gmap_table_walk(sg, saddr, 2);
198762306a36Sopenharmony_ci		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
198862306a36Sopenharmony_ci			rc = -EAGAIN;		/* Race with unshadow */
198962306a36Sopenharmony_ci		else
199062306a36Sopenharmony_ci			*table &= ~_REGION_ENTRY_INVALID;
199162306a36Sopenharmony_ci	} else {
199262306a36Sopenharmony_ci		gmap_unshadow_sgt(sg, raddr);
199362306a36Sopenharmony_ci	}
199462306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
199562306a36Sopenharmony_ci	return rc;
199662306a36Sopenharmony_ciout_free:
199762306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
199862306a36Sopenharmony_ci	__free_pages(page, CRST_ALLOC_ORDER);
199962306a36Sopenharmony_ci	return rc;
200062306a36Sopenharmony_ci}
200162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_sgt);
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci/**
200462306a36Sopenharmony_ci * gmap_shadow_pgt_lookup - find a shadow page table
200562306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
200662306a36Sopenharmony_ci * @saddr: the address in the shadow aguest address space
200762306a36Sopenharmony_ci * @pgt: parent gmap address of the page table to get shadowed
200862306a36Sopenharmony_ci * @dat_protection: if the pgtable is marked as protected by dat
200962306a36Sopenharmony_ci * @fake: pgt references contiguous guest memory block, not a pgtable
201062306a36Sopenharmony_ci *
201162306a36Sopenharmony_ci * Returns 0 if the shadow page table was found and -EAGAIN if the page
201262306a36Sopenharmony_ci * table was not found.
201362306a36Sopenharmony_ci *
201462306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
201562306a36Sopenharmony_ci */
201662306a36Sopenharmony_ciint gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
201762306a36Sopenharmony_ci			   unsigned long *pgt, int *dat_protection,
201862306a36Sopenharmony_ci			   int *fake)
201962306a36Sopenharmony_ci{
202062306a36Sopenharmony_ci	unsigned long *table;
202162306a36Sopenharmony_ci	struct page *page;
202262306a36Sopenharmony_ci	int rc;
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
202562306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
202662306a36Sopenharmony_ci	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
202762306a36Sopenharmony_ci	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
202862306a36Sopenharmony_ci		/* Shadow page tables are full pages (pte+pgste) */
202962306a36Sopenharmony_ci		page = pfn_to_page(*table >> PAGE_SHIFT);
203062306a36Sopenharmony_ci		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
203162306a36Sopenharmony_ci		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
203262306a36Sopenharmony_ci		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
203362306a36Sopenharmony_ci		rc = 0;
203462306a36Sopenharmony_ci	} else  {
203562306a36Sopenharmony_ci		rc = -EAGAIN;
203662306a36Sopenharmony_ci	}
203762306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
203862306a36Sopenharmony_ci	return rc;
203962306a36Sopenharmony_ci
204062306a36Sopenharmony_ci}
204162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
204262306a36Sopenharmony_ci
204362306a36Sopenharmony_ci/**
204462306a36Sopenharmony_ci * gmap_shadow_pgt - instantiate a shadow page table
204562306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
204662306a36Sopenharmony_ci * @saddr: faulting address in the shadow gmap
204762306a36Sopenharmony_ci * @pgt: parent gmap address of the page table to get shadowed
204862306a36Sopenharmony_ci * @fake: pgt references contiguous guest memory block, not a pgtable
204962306a36Sopenharmony_ci *
205062306a36Sopenharmony_ci * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
205162306a36Sopenharmony_ci * shadow table structure is incomplete, -ENOMEM if out of memory,
205262306a36Sopenharmony_ci * -EFAULT if an address in the parent gmap could not be resolved and
205362306a36Sopenharmony_ci *
205462306a36Sopenharmony_ci * Called with gmap->mm->mmap_lock in read
205562306a36Sopenharmony_ci */
205662306a36Sopenharmony_ciint gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
205762306a36Sopenharmony_ci		    int fake)
205862306a36Sopenharmony_ci{
205962306a36Sopenharmony_ci	unsigned long raddr, origin;
206062306a36Sopenharmony_ci	unsigned long *table;
206162306a36Sopenharmony_ci	struct page *page;
206262306a36Sopenharmony_ci	phys_addr_t s_pgt;
206362306a36Sopenharmony_ci	int rc;
206462306a36Sopenharmony_ci
206562306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
206662306a36Sopenharmony_ci	/* Allocate a shadow page table */
206762306a36Sopenharmony_ci	page = page_table_alloc_pgste(sg->mm);
206862306a36Sopenharmony_ci	if (!page)
206962306a36Sopenharmony_ci		return -ENOMEM;
207062306a36Sopenharmony_ci	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
207162306a36Sopenharmony_ci	if (fake)
207262306a36Sopenharmony_ci		page->index |= GMAP_SHADOW_FAKE_TABLE;
207362306a36Sopenharmony_ci	s_pgt = page_to_phys(page);
207462306a36Sopenharmony_ci	/* Install shadow page table */
207562306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
207662306a36Sopenharmony_ci	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
207762306a36Sopenharmony_ci	if (!table) {
207862306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with unshadow */
207962306a36Sopenharmony_ci		goto out_free;
208062306a36Sopenharmony_ci	}
208162306a36Sopenharmony_ci	if (!(*table & _SEGMENT_ENTRY_INVALID)) {
208262306a36Sopenharmony_ci		rc = 0;			/* Already established */
208362306a36Sopenharmony_ci		goto out_free;
208462306a36Sopenharmony_ci	} else if (*table & _SEGMENT_ENTRY_ORIGIN) {
208562306a36Sopenharmony_ci		rc = -EAGAIN;		/* Race with shadow */
208662306a36Sopenharmony_ci		goto out_free;
208762306a36Sopenharmony_ci	}
208862306a36Sopenharmony_ci	/* mark as invalid as long as the parent table is not protected */
208962306a36Sopenharmony_ci	*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
209062306a36Sopenharmony_ci		 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
209162306a36Sopenharmony_ci	list_add(&page->lru, &sg->pt_list);
209262306a36Sopenharmony_ci	if (fake) {
209362306a36Sopenharmony_ci		/* nothing to protect for fake tables */
209462306a36Sopenharmony_ci		*table &= ~_SEGMENT_ENTRY_INVALID;
209562306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
209662306a36Sopenharmony_ci		return 0;
209762306a36Sopenharmony_ci	}
209862306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
209962306a36Sopenharmony_ci	/* Make pgt read-only in parent gmap page table (not the pgste) */
210062306a36Sopenharmony_ci	raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
210162306a36Sopenharmony_ci	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
210262306a36Sopenharmony_ci	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
210362306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
210462306a36Sopenharmony_ci	if (!rc) {
210562306a36Sopenharmony_ci		table = gmap_table_walk(sg, saddr, 1);
210662306a36Sopenharmony_ci		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
210762306a36Sopenharmony_ci			rc = -EAGAIN;		/* Race with unshadow */
210862306a36Sopenharmony_ci		else
210962306a36Sopenharmony_ci			*table &= ~_SEGMENT_ENTRY_INVALID;
211062306a36Sopenharmony_ci	} else {
211162306a36Sopenharmony_ci		gmap_unshadow_pgt(sg, raddr);
211262306a36Sopenharmony_ci	}
211362306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
211462306a36Sopenharmony_ci	return rc;
211562306a36Sopenharmony_ciout_free:
211662306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
211762306a36Sopenharmony_ci	page_table_free_pgste(page);
211862306a36Sopenharmony_ci	return rc;
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci}
212162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_pgt);
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci/**
212462306a36Sopenharmony_ci * gmap_shadow_page - create a shadow page mapping
212562306a36Sopenharmony_ci * @sg: pointer to the shadow guest address space structure
212662306a36Sopenharmony_ci * @saddr: faulting address in the shadow gmap
212762306a36Sopenharmony_ci * @pte: pte in parent gmap address space to get shadowed
212862306a36Sopenharmony_ci *
212962306a36Sopenharmony_ci * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
213062306a36Sopenharmony_ci * shadow table structure is incomplete, -ENOMEM if out of memory and
213162306a36Sopenharmony_ci * -EFAULT if an address in the parent gmap could not be resolved.
213262306a36Sopenharmony_ci *
213362306a36Sopenharmony_ci * Called with sg->mm->mmap_lock in read.
213462306a36Sopenharmony_ci */
213562306a36Sopenharmony_ciint gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
213662306a36Sopenharmony_ci{
213762306a36Sopenharmony_ci	struct gmap *parent;
213862306a36Sopenharmony_ci	struct gmap_rmap *rmap;
213962306a36Sopenharmony_ci	unsigned long vmaddr, paddr;
214062306a36Sopenharmony_ci	spinlock_t *ptl;
214162306a36Sopenharmony_ci	pte_t *sptep, *tptep;
214262306a36Sopenharmony_ci	int prot;
214362306a36Sopenharmony_ci	int rc;
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
214662306a36Sopenharmony_ci	parent = sg->parent;
214762306a36Sopenharmony_ci	prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
214862306a36Sopenharmony_ci
214962306a36Sopenharmony_ci	rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
215062306a36Sopenharmony_ci	if (!rmap)
215162306a36Sopenharmony_ci		return -ENOMEM;
215262306a36Sopenharmony_ci	rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	while (1) {
215562306a36Sopenharmony_ci		paddr = pte_val(pte) & PAGE_MASK;
215662306a36Sopenharmony_ci		vmaddr = __gmap_translate(parent, paddr);
215762306a36Sopenharmony_ci		if (IS_ERR_VALUE(vmaddr)) {
215862306a36Sopenharmony_ci			rc = vmaddr;
215962306a36Sopenharmony_ci			break;
216062306a36Sopenharmony_ci		}
216162306a36Sopenharmony_ci		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
216262306a36Sopenharmony_ci		if (rc)
216362306a36Sopenharmony_ci			break;
216462306a36Sopenharmony_ci		rc = -EAGAIN;
216562306a36Sopenharmony_ci		sptep = gmap_pte_op_walk(parent, paddr, &ptl);
216662306a36Sopenharmony_ci		if (sptep) {
216762306a36Sopenharmony_ci			spin_lock(&sg->guest_table_lock);
216862306a36Sopenharmony_ci			/* Get page table pointer */
216962306a36Sopenharmony_ci			tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
217062306a36Sopenharmony_ci			if (!tptep) {
217162306a36Sopenharmony_ci				spin_unlock(&sg->guest_table_lock);
217262306a36Sopenharmony_ci				gmap_pte_op_end(sptep, ptl);
217362306a36Sopenharmony_ci				radix_tree_preload_end();
217462306a36Sopenharmony_ci				break;
217562306a36Sopenharmony_ci			}
217662306a36Sopenharmony_ci			rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
217762306a36Sopenharmony_ci			if (rc > 0) {
217862306a36Sopenharmony_ci				/* Success and a new mapping */
217962306a36Sopenharmony_ci				gmap_insert_rmap(sg, vmaddr, rmap);
218062306a36Sopenharmony_ci				rmap = NULL;
218162306a36Sopenharmony_ci				rc = 0;
218262306a36Sopenharmony_ci			}
218362306a36Sopenharmony_ci			gmap_pte_op_end(sptep, ptl);
218462306a36Sopenharmony_ci			spin_unlock(&sg->guest_table_lock);
218562306a36Sopenharmony_ci		}
218662306a36Sopenharmony_ci		radix_tree_preload_end();
218762306a36Sopenharmony_ci		if (!rc)
218862306a36Sopenharmony_ci			break;
218962306a36Sopenharmony_ci		rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
219062306a36Sopenharmony_ci		if (rc)
219162306a36Sopenharmony_ci			break;
219262306a36Sopenharmony_ci	}
219362306a36Sopenharmony_ci	kfree(rmap);
219462306a36Sopenharmony_ci	return rc;
219562306a36Sopenharmony_ci}
219662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_shadow_page);
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_ci/*
219962306a36Sopenharmony_ci * gmap_shadow_notify - handle notifications for shadow gmap
220062306a36Sopenharmony_ci *
220162306a36Sopenharmony_ci * Called with sg->parent->shadow_lock.
220262306a36Sopenharmony_ci */
220362306a36Sopenharmony_cistatic void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
220462306a36Sopenharmony_ci			       unsigned long gaddr)
220562306a36Sopenharmony_ci{
220662306a36Sopenharmony_ci	struct gmap_rmap *rmap, *rnext, *head;
220762306a36Sopenharmony_ci	unsigned long start, end, bits, raddr;
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci	BUG_ON(!gmap_is_shadow(sg));
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci	spin_lock(&sg->guest_table_lock);
221262306a36Sopenharmony_ci	if (sg->removed) {
221362306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
221462306a36Sopenharmony_ci		return;
221562306a36Sopenharmony_ci	}
221662306a36Sopenharmony_ci	/* Check for top level table */
221762306a36Sopenharmony_ci	start = sg->orig_asce & _ASCE_ORIGIN;
221862306a36Sopenharmony_ci	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
221962306a36Sopenharmony_ci	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
222062306a36Sopenharmony_ci	    gaddr < end) {
222162306a36Sopenharmony_ci		/* The complete shadow table has to go */
222262306a36Sopenharmony_ci		gmap_unshadow(sg);
222362306a36Sopenharmony_ci		spin_unlock(&sg->guest_table_lock);
222462306a36Sopenharmony_ci		list_del(&sg->list);
222562306a36Sopenharmony_ci		gmap_put(sg);
222662306a36Sopenharmony_ci		return;
222762306a36Sopenharmony_ci	}
222862306a36Sopenharmony_ci	/* Remove the page table tree from on specific entry */
222962306a36Sopenharmony_ci	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
223062306a36Sopenharmony_ci	gmap_for_each_rmap_safe(rmap, rnext, head) {
223162306a36Sopenharmony_ci		bits = rmap->raddr & _SHADOW_RMAP_MASK;
223262306a36Sopenharmony_ci		raddr = rmap->raddr ^ bits;
223362306a36Sopenharmony_ci		switch (bits) {
223462306a36Sopenharmony_ci		case _SHADOW_RMAP_REGION1:
223562306a36Sopenharmony_ci			gmap_unshadow_r2t(sg, raddr);
223662306a36Sopenharmony_ci			break;
223762306a36Sopenharmony_ci		case _SHADOW_RMAP_REGION2:
223862306a36Sopenharmony_ci			gmap_unshadow_r3t(sg, raddr);
223962306a36Sopenharmony_ci			break;
224062306a36Sopenharmony_ci		case _SHADOW_RMAP_REGION3:
224162306a36Sopenharmony_ci			gmap_unshadow_sgt(sg, raddr);
224262306a36Sopenharmony_ci			break;
224362306a36Sopenharmony_ci		case _SHADOW_RMAP_SEGMENT:
224462306a36Sopenharmony_ci			gmap_unshadow_pgt(sg, raddr);
224562306a36Sopenharmony_ci			break;
224662306a36Sopenharmony_ci		case _SHADOW_RMAP_PGTABLE:
224762306a36Sopenharmony_ci			gmap_unshadow_page(sg, raddr);
224862306a36Sopenharmony_ci			break;
224962306a36Sopenharmony_ci		}
225062306a36Sopenharmony_ci		kfree(rmap);
225162306a36Sopenharmony_ci	}
225262306a36Sopenharmony_ci	spin_unlock(&sg->guest_table_lock);
225362306a36Sopenharmony_ci}
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci/**
225662306a36Sopenharmony_ci * ptep_notify - call all invalidation callbacks for a specific pte.
225762306a36Sopenharmony_ci * @mm: pointer to the process mm_struct
225862306a36Sopenharmony_ci * @vmaddr: virtual address in the process address space
225962306a36Sopenharmony_ci * @pte: pointer to the page table entry
226062306a36Sopenharmony_ci * @bits: bits from the pgste that caused the notify call
226162306a36Sopenharmony_ci *
226262306a36Sopenharmony_ci * This function is assumed to be called with the page table lock held
226362306a36Sopenharmony_ci * for the pte to notify.
226462306a36Sopenharmony_ci */
226562306a36Sopenharmony_civoid ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
226662306a36Sopenharmony_ci		 pte_t *pte, unsigned long bits)
226762306a36Sopenharmony_ci{
226862306a36Sopenharmony_ci	unsigned long offset, gaddr = 0;
226962306a36Sopenharmony_ci	unsigned long *table;
227062306a36Sopenharmony_ci	struct gmap *gmap, *sg, *next;
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
227362306a36Sopenharmony_ci	offset = offset * (PAGE_SIZE / sizeof(pte_t));
227462306a36Sopenharmony_ci	rcu_read_lock();
227562306a36Sopenharmony_ci	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
227662306a36Sopenharmony_ci		spin_lock(&gmap->guest_table_lock);
227762306a36Sopenharmony_ci		table = radix_tree_lookup(&gmap->host_to_guest,
227862306a36Sopenharmony_ci					  vmaddr >> PMD_SHIFT);
227962306a36Sopenharmony_ci		if (table)
228062306a36Sopenharmony_ci			gaddr = __gmap_segment_gaddr(table) + offset;
228162306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
228262306a36Sopenharmony_ci		if (!table)
228362306a36Sopenharmony_ci			continue;
228462306a36Sopenharmony_ci
228562306a36Sopenharmony_ci		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
228662306a36Sopenharmony_ci			spin_lock(&gmap->shadow_lock);
228762306a36Sopenharmony_ci			list_for_each_entry_safe(sg, next,
228862306a36Sopenharmony_ci						 &gmap->children, list)
228962306a36Sopenharmony_ci				gmap_shadow_notify(sg, vmaddr, gaddr);
229062306a36Sopenharmony_ci			spin_unlock(&gmap->shadow_lock);
229162306a36Sopenharmony_ci		}
229262306a36Sopenharmony_ci		if (bits & PGSTE_IN_BIT)
229362306a36Sopenharmony_ci			gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
229462306a36Sopenharmony_ci	}
229562306a36Sopenharmony_ci	rcu_read_unlock();
229662306a36Sopenharmony_ci}
229762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(ptep_notify);
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_cistatic void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
230062306a36Sopenharmony_ci			     unsigned long gaddr)
230162306a36Sopenharmony_ci{
230262306a36Sopenharmony_ci	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
230362306a36Sopenharmony_ci	gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
230462306a36Sopenharmony_ci}
230562306a36Sopenharmony_ci
230662306a36Sopenharmony_ci/**
230762306a36Sopenharmony_ci * gmap_pmdp_xchg - exchange a gmap pmd with another
230862306a36Sopenharmony_ci * @gmap: pointer to the guest address space structure
230962306a36Sopenharmony_ci * @pmdp: pointer to the pmd entry
231062306a36Sopenharmony_ci * @new: replacement entry
231162306a36Sopenharmony_ci * @gaddr: the affected guest address
231262306a36Sopenharmony_ci *
231362306a36Sopenharmony_ci * This function is assumed to be called with the guest_table_lock
231462306a36Sopenharmony_ci * held.
231562306a36Sopenharmony_ci */
231662306a36Sopenharmony_cistatic void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
231762306a36Sopenharmony_ci			   unsigned long gaddr)
231862306a36Sopenharmony_ci{
231962306a36Sopenharmony_ci	gaddr &= HPAGE_MASK;
232062306a36Sopenharmony_ci	pmdp_notify_gmap(gmap, pmdp, gaddr);
232162306a36Sopenharmony_ci	new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
232262306a36Sopenharmony_ci	if (MACHINE_HAS_TLB_GUEST)
232362306a36Sopenharmony_ci		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
232462306a36Sopenharmony_ci			    IDTE_GLOBAL);
232562306a36Sopenharmony_ci	else if (MACHINE_HAS_IDTE)
232662306a36Sopenharmony_ci		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
232762306a36Sopenharmony_ci	else
232862306a36Sopenharmony_ci		__pmdp_csp(pmdp);
232962306a36Sopenharmony_ci	set_pmd(pmdp, new);
233062306a36Sopenharmony_ci}
233162306a36Sopenharmony_ci
233262306a36Sopenharmony_cistatic void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
233362306a36Sopenharmony_ci			    int purge)
233462306a36Sopenharmony_ci{
233562306a36Sopenharmony_ci	pmd_t *pmdp;
233662306a36Sopenharmony_ci	struct gmap *gmap;
233762306a36Sopenharmony_ci	unsigned long gaddr;
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci	rcu_read_lock();
234062306a36Sopenharmony_ci	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
234162306a36Sopenharmony_ci		spin_lock(&gmap->guest_table_lock);
234262306a36Sopenharmony_ci		pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
234362306a36Sopenharmony_ci						  vmaddr >> PMD_SHIFT);
234462306a36Sopenharmony_ci		if (pmdp) {
234562306a36Sopenharmony_ci			gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
234662306a36Sopenharmony_ci			pmdp_notify_gmap(gmap, pmdp, gaddr);
234762306a36Sopenharmony_ci			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
234862306a36Sopenharmony_ci						   _SEGMENT_ENTRY_GMAP_UC));
234962306a36Sopenharmony_ci			if (purge)
235062306a36Sopenharmony_ci				__pmdp_csp(pmdp);
235162306a36Sopenharmony_ci			set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
235262306a36Sopenharmony_ci		}
235362306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
235462306a36Sopenharmony_ci	}
235562306a36Sopenharmony_ci	rcu_read_unlock();
235662306a36Sopenharmony_ci}
235762306a36Sopenharmony_ci
235862306a36Sopenharmony_ci/**
235962306a36Sopenharmony_ci * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
236062306a36Sopenharmony_ci *                        flushing
236162306a36Sopenharmony_ci * @mm: pointer to the process mm_struct
236262306a36Sopenharmony_ci * @vmaddr: virtual address in the process address space
236362306a36Sopenharmony_ci */
236462306a36Sopenharmony_civoid gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
236562306a36Sopenharmony_ci{
236662306a36Sopenharmony_ci	gmap_pmdp_clear(mm, vmaddr, 0);
236762306a36Sopenharmony_ci}
236862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
236962306a36Sopenharmony_ci
237062306a36Sopenharmony_ci/**
237162306a36Sopenharmony_ci * gmap_pmdp_csp - csp all affected guest pmd entries
237262306a36Sopenharmony_ci * @mm: pointer to the process mm_struct
237362306a36Sopenharmony_ci * @vmaddr: virtual address in the process address space
237462306a36Sopenharmony_ci */
237562306a36Sopenharmony_civoid gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
237662306a36Sopenharmony_ci{
237762306a36Sopenharmony_ci	gmap_pmdp_clear(mm, vmaddr, 1);
237862306a36Sopenharmony_ci}
237962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_pmdp_csp);
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_ci/**
238262306a36Sopenharmony_ci * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
238362306a36Sopenharmony_ci * @mm: pointer to the process mm_struct
238462306a36Sopenharmony_ci * @vmaddr: virtual address in the process address space
238562306a36Sopenharmony_ci */
238662306a36Sopenharmony_civoid gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
238762306a36Sopenharmony_ci{
238862306a36Sopenharmony_ci	unsigned long *entry, gaddr;
238962306a36Sopenharmony_ci	struct gmap *gmap;
239062306a36Sopenharmony_ci	pmd_t *pmdp;
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci	rcu_read_lock();
239362306a36Sopenharmony_ci	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
239462306a36Sopenharmony_ci		spin_lock(&gmap->guest_table_lock);
239562306a36Sopenharmony_ci		entry = radix_tree_delete(&gmap->host_to_guest,
239662306a36Sopenharmony_ci					  vmaddr >> PMD_SHIFT);
239762306a36Sopenharmony_ci		if (entry) {
239862306a36Sopenharmony_ci			pmdp = (pmd_t *)entry;
239962306a36Sopenharmony_ci			gaddr = __gmap_segment_gaddr(entry);
240062306a36Sopenharmony_ci			pmdp_notify_gmap(gmap, pmdp, gaddr);
240162306a36Sopenharmony_ci			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
240262306a36Sopenharmony_ci					   _SEGMENT_ENTRY_GMAP_UC));
240362306a36Sopenharmony_ci			if (MACHINE_HAS_TLB_GUEST)
240462306a36Sopenharmony_ci				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
240562306a36Sopenharmony_ci					    gmap->asce, IDTE_LOCAL);
240662306a36Sopenharmony_ci			else if (MACHINE_HAS_IDTE)
240762306a36Sopenharmony_ci				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
240862306a36Sopenharmony_ci			*entry = _SEGMENT_ENTRY_EMPTY;
240962306a36Sopenharmony_ci		}
241062306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
241162306a36Sopenharmony_ci	}
241262306a36Sopenharmony_ci	rcu_read_unlock();
241362306a36Sopenharmony_ci}
241462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
241562306a36Sopenharmony_ci
241662306a36Sopenharmony_ci/**
241762306a36Sopenharmony_ci * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
241862306a36Sopenharmony_ci * @mm: pointer to the process mm_struct
241962306a36Sopenharmony_ci * @vmaddr: virtual address in the process address space
242062306a36Sopenharmony_ci */
242162306a36Sopenharmony_civoid gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
242262306a36Sopenharmony_ci{
242362306a36Sopenharmony_ci	unsigned long *entry, gaddr;
242462306a36Sopenharmony_ci	struct gmap *gmap;
242562306a36Sopenharmony_ci	pmd_t *pmdp;
242662306a36Sopenharmony_ci
242762306a36Sopenharmony_ci	rcu_read_lock();
242862306a36Sopenharmony_ci	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
242962306a36Sopenharmony_ci		spin_lock(&gmap->guest_table_lock);
243062306a36Sopenharmony_ci		entry = radix_tree_delete(&gmap->host_to_guest,
243162306a36Sopenharmony_ci					  vmaddr >> PMD_SHIFT);
243262306a36Sopenharmony_ci		if (entry) {
243362306a36Sopenharmony_ci			pmdp = (pmd_t *)entry;
243462306a36Sopenharmony_ci			gaddr = __gmap_segment_gaddr(entry);
243562306a36Sopenharmony_ci			pmdp_notify_gmap(gmap, pmdp, gaddr);
243662306a36Sopenharmony_ci			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
243762306a36Sopenharmony_ci					   _SEGMENT_ENTRY_GMAP_UC));
243862306a36Sopenharmony_ci			if (MACHINE_HAS_TLB_GUEST)
243962306a36Sopenharmony_ci				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
244062306a36Sopenharmony_ci					    gmap->asce, IDTE_GLOBAL);
244162306a36Sopenharmony_ci			else if (MACHINE_HAS_IDTE)
244262306a36Sopenharmony_ci				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
244362306a36Sopenharmony_ci			else
244462306a36Sopenharmony_ci				__pmdp_csp(pmdp);
244562306a36Sopenharmony_ci			*entry = _SEGMENT_ENTRY_EMPTY;
244662306a36Sopenharmony_ci		}
244762306a36Sopenharmony_ci		spin_unlock(&gmap->guest_table_lock);
244862306a36Sopenharmony_ci	}
244962306a36Sopenharmony_ci	rcu_read_unlock();
245062306a36Sopenharmony_ci}
245162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
245262306a36Sopenharmony_ci
245362306a36Sopenharmony_ci/**
245462306a36Sopenharmony_ci * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
245562306a36Sopenharmony_ci * @gmap: pointer to guest address space
245662306a36Sopenharmony_ci * @pmdp: pointer to the pmd to be tested
245762306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
245862306a36Sopenharmony_ci *
245962306a36Sopenharmony_ci * This function is assumed to be called with the guest_table_lock
246062306a36Sopenharmony_ci * held.
246162306a36Sopenharmony_ci */
246262306a36Sopenharmony_cistatic bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
246362306a36Sopenharmony_ci					  unsigned long gaddr)
246462306a36Sopenharmony_ci{
246562306a36Sopenharmony_ci	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
246662306a36Sopenharmony_ci		return false;
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci	/* Already protected memory, which did not change is clean */
246962306a36Sopenharmony_ci	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
247062306a36Sopenharmony_ci	    !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
247162306a36Sopenharmony_ci		return false;
247262306a36Sopenharmony_ci
247362306a36Sopenharmony_ci	/* Clear UC indication and reset protection */
247462306a36Sopenharmony_ci	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
247562306a36Sopenharmony_ci	gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
247662306a36Sopenharmony_ci	return true;
247762306a36Sopenharmony_ci}
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_ci/**
248062306a36Sopenharmony_ci * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
248162306a36Sopenharmony_ci * @gmap: pointer to guest address space
248262306a36Sopenharmony_ci * @bitmap: dirty bitmap for this pmd
248362306a36Sopenharmony_ci * @gaddr: virtual address in the guest address space
248462306a36Sopenharmony_ci * @vmaddr: virtual address in the host address space
248562306a36Sopenharmony_ci *
248662306a36Sopenharmony_ci * This function is assumed to be called with the guest_table_lock
248762306a36Sopenharmony_ci * held.
248862306a36Sopenharmony_ci */
248962306a36Sopenharmony_civoid gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
249062306a36Sopenharmony_ci			     unsigned long gaddr, unsigned long vmaddr)
249162306a36Sopenharmony_ci{
249262306a36Sopenharmony_ci	int i;
249362306a36Sopenharmony_ci	pmd_t *pmdp;
249462306a36Sopenharmony_ci	pte_t *ptep;
249562306a36Sopenharmony_ci	spinlock_t *ptl;
249662306a36Sopenharmony_ci
249762306a36Sopenharmony_ci	pmdp = gmap_pmd_op_walk(gmap, gaddr);
249862306a36Sopenharmony_ci	if (!pmdp)
249962306a36Sopenharmony_ci		return;
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	if (pmd_large(*pmdp)) {
250262306a36Sopenharmony_ci		if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
250362306a36Sopenharmony_ci			bitmap_fill(bitmap, _PAGE_ENTRIES);
250462306a36Sopenharmony_ci	} else {
250562306a36Sopenharmony_ci		for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
250662306a36Sopenharmony_ci			ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
250762306a36Sopenharmony_ci			if (!ptep)
250862306a36Sopenharmony_ci				continue;
250962306a36Sopenharmony_ci			if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
251062306a36Sopenharmony_ci				set_bit(i, bitmap);
251162306a36Sopenharmony_ci			pte_unmap_unlock(ptep, ptl);
251262306a36Sopenharmony_ci		}
251362306a36Sopenharmony_ci	}
251462306a36Sopenharmony_ci	gmap_pmd_op_end(gmap, pmdp);
251562306a36Sopenharmony_ci}
251662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
251762306a36Sopenharmony_ci
251862306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
251962306a36Sopenharmony_cistatic int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
252062306a36Sopenharmony_ci				    unsigned long end, struct mm_walk *walk)
252162306a36Sopenharmony_ci{
252262306a36Sopenharmony_ci	struct vm_area_struct *vma = walk->vma;
252362306a36Sopenharmony_ci
252462306a36Sopenharmony_ci	split_huge_pmd(vma, pmd, addr);
252562306a36Sopenharmony_ci	return 0;
252662306a36Sopenharmony_ci}
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_cistatic const struct mm_walk_ops thp_split_walk_ops = {
252962306a36Sopenharmony_ci	.pmd_entry	= thp_split_walk_pmd_entry,
253062306a36Sopenharmony_ci	.walk_lock	= PGWALK_WRLOCK_VERIFY,
253162306a36Sopenharmony_ci};
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_cistatic inline void thp_split_mm(struct mm_struct *mm)
253462306a36Sopenharmony_ci{
253562306a36Sopenharmony_ci	struct vm_area_struct *vma;
253662306a36Sopenharmony_ci	VMA_ITERATOR(vmi, mm, 0);
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci	for_each_vma(vmi, vma) {
253962306a36Sopenharmony_ci		vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
254062306a36Sopenharmony_ci		walk_page_vma(vma, &thp_split_walk_ops, NULL);
254162306a36Sopenharmony_ci	}
254262306a36Sopenharmony_ci	mm->def_flags |= VM_NOHUGEPAGE;
254362306a36Sopenharmony_ci}
254462306a36Sopenharmony_ci#else
254562306a36Sopenharmony_cistatic inline void thp_split_mm(struct mm_struct *mm)
254662306a36Sopenharmony_ci{
254762306a36Sopenharmony_ci}
254862306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_ci/*
255162306a36Sopenharmony_ci * Remove all empty zero pages from the mapping for lazy refaulting
255262306a36Sopenharmony_ci * - This must be called after mm->context.has_pgste is set, to avoid
255362306a36Sopenharmony_ci *   future creation of zero pages
255462306a36Sopenharmony_ci * - This must be called after THP was disabled.
255562306a36Sopenharmony_ci *
255662306a36Sopenharmony_ci * mm contracts with s390, that even if mm were to remove a page table,
255762306a36Sopenharmony_ci * racing with the loop below and so causing pte_offset_map_lock() to fail,
255862306a36Sopenharmony_ci * it will never insert a page table containing empty zero pages once
255962306a36Sopenharmony_ci * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
256062306a36Sopenharmony_ci */
256162306a36Sopenharmony_cistatic int __zap_zero_pages(pmd_t *pmd, unsigned long start,
256262306a36Sopenharmony_ci			   unsigned long end, struct mm_walk *walk)
256362306a36Sopenharmony_ci{
256462306a36Sopenharmony_ci	unsigned long addr;
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	for (addr = start; addr != end; addr += PAGE_SIZE) {
256762306a36Sopenharmony_ci		pte_t *ptep;
256862306a36Sopenharmony_ci		spinlock_t *ptl;
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci		ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
257162306a36Sopenharmony_ci		if (!ptep)
257262306a36Sopenharmony_ci			break;
257362306a36Sopenharmony_ci		if (is_zero_pfn(pte_pfn(*ptep)))
257462306a36Sopenharmony_ci			ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
257562306a36Sopenharmony_ci		pte_unmap_unlock(ptep, ptl);
257662306a36Sopenharmony_ci	}
257762306a36Sopenharmony_ci	return 0;
257862306a36Sopenharmony_ci}
257962306a36Sopenharmony_ci
258062306a36Sopenharmony_cistatic const struct mm_walk_ops zap_zero_walk_ops = {
258162306a36Sopenharmony_ci	.pmd_entry	= __zap_zero_pages,
258262306a36Sopenharmony_ci	.walk_lock	= PGWALK_WRLOCK,
258362306a36Sopenharmony_ci};
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci/*
258662306a36Sopenharmony_ci * switch on pgstes for its userspace process (for kvm)
258762306a36Sopenharmony_ci */
258862306a36Sopenharmony_ciint s390_enable_sie(void)
258962306a36Sopenharmony_ci{
259062306a36Sopenharmony_ci	struct mm_struct *mm = current->mm;
259162306a36Sopenharmony_ci
259262306a36Sopenharmony_ci	/* Do we have pgstes? if yes, we are done */
259362306a36Sopenharmony_ci	if (mm_has_pgste(mm))
259462306a36Sopenharmony_ci		return 0;
259562306a36Sopenharmony_ci	/* Fail if the page tables are 2K */
259662306a36Sopenharmony_ci	if (!mm_alloc_pgste(mm))
259762306a36Sopenharmony_ci		return -EINVAL;
259862306a36Sopenharmony_ci	mmap_write_lock(mm);
259962306a36Sopenharmony_ci	mm->context.has_pgste = 1;
260062306a36Sopenharmony_ci	/* split thp mappings and disable thp for future mappings */
260162306a36Sopenharmony_ci	thp_split_mm(mm);
260262306a36Sopenharmony_ci	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
260362306a36Sopenharmony_ci	mmap_write_unlock(mm);
260462306a36Sopenharmony_ci	return 0;
260562306a36Sopenharmony_ci}
260662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_enable_sie);
260762306a36Sopenharmony_ci
260862306a36Sopenharmony_ciint gmap_mark_unmergeable(void)
260962306a36Sopenharmony_ci{
261062306a36Sopenharmony_ci	/*
261162306a36Sopenharmony_ci	 * Make sure to disable KSM (if enabled for the whole process or
261262306a36Sopenharmony_ci	 * individual VMAs). Note that nothing currently hinders user space
261362306a36Sopenharmony_ci	 * from re-enabling it.
261462306a36Sopenharmony_ci	 */
261562306a36Sopenharmony_ci	return ksm_disable(current->mm);
261662306a36Sopenharmony_ci}
261762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
261862306a36Sopenharmony_ci
261962306a36Sopenharmony_ci/*
262062306a36Sopenharmony_ci * Enable storage key handling from now on and initialize the storage
262162306a36Sopenharmony_ci * keys with the default key.
262262306a36Sopenharmony_ci */
262362306a36Sopenharmony_cistatic int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
262462306a36Sopenharmony_ci				  unsigned long next, struct mm_walk *walk)
262562306a36Sopenharmony_ci{
262662306a36Sopenharmony_ci	/* Clear storage key */
262762306a36Sopenharmony_ci	ptep_zap_key(walk->mm, addr, pte);
262862306a36Sopenharmony_ci	return 0;
262962306a36Sopenharmony_ci}
263062306a36Sopenharmony_ci
263162306a36Sopenharmony_ci/*
263262306a36Sopenharmony_ci * Give a chance to schedule after setting a key to 256 pages.
263362306a36Sopenharmony_ci * We only hold the mm lock, which is a rwsem and the kvm srcu.
263462306a36Sopenharmony_ci * Both can sleep.
263562306a36Sopenharmony_ci */
263662306a36Sopenharmony_cistatic int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
263762306a36Sopenharmony_ci				  unsigned long next, struct mm_walk *walk)
263862306a36Sopenharmony_ci{
263962306a36Sopenharmony_ci	cond_resched();
264062306a36Sopenharmony_ci	return 0;
264162306a36Sopenharmony_ci}
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_cistatic int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
264462306a36Sopenharmony_ci				      unsigned long hmask, unsigned long next,
264562306a36Sopenharmony_ci				      struct mm_walk *walk)
264662306a36Sopenharmony_ci{
264762306a36Sopenharmony_ci	pmd_t *pmd = (pmd_t *)pte;
264862306a36Sopenharmony_ci	unsigned long start, end;
264962306a36Sopenharmony_ci	struct page *page = pmd_page(*pmd);
265062306a36Sopenharmony_ci
265162306a36Sopenharmony_ci	/*
265262306a36Sopenharmony_ci	 * The write check makes sure we do not set a key on shared
265362306a36Sopenharmony_ci	 * memory. This is needed as the walker does not differentiate
265462306a36Sopenharmony_ci	 * between actual guest memory and the process executable or
265562306a36Sopenharmony_ci	 * shared libraries.
265662306a36Sopenharmony_ci	 */
265762306a36Sopenharmony_ci	if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
265862306a36Sopenharmony_ci	    !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
265962306a36Sopenharmony_ci		return 0;
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	start = pmd_val(*pmd) & HPAGE_MASK;
266262306a36Sopenharmony_ci	end = start + HPAGE_SIZE - 1;
266362306a36Sopenharmony_ci	__storage_key_init_range(start, end);
266462306a36Sopenharmony_ci	set_bit(PG_arch_1, &page->flags);
266562306a36Sopenharmony_ci	cond_resched();
266662306a36Sopenharmony_ci	return 0;
266762306a36Sopenharmony_ci}
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_cistatic const struct mm_walk_ops enable_skey_walk_ops = {
267062306a36Sopenharmony_ci	.hugetlb_entry		= __s390_enable_skey_hugetlb,
267162306a36Sopenharmony_ci	.pte_entry		= __s390_enable_skey_pte,
267262306a36Sopenharmony_ci	.pmd_entry		= __s390_enable_skey_pmd,
267362306a36Sopenharmony_ci	.walk_lock		= PGWALK_WRLOCK,
267462306a36Sopenharmony_ci};
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ciint s390_enable_skey(void)
267762306a36Sopenharmony_ci{
267862306a36Sopenharmony_ci	struct mm_struct *mm = current->mm;
267962306a36Sopenharmony_ci	int rc = 0;
268062306a36Sopenharmony_ci
268162306a36Sopenharmony_ci	mmap_write_lock(mm);
268262306a36Sopenharmony_ci	if (mm_uses_skeys(mm))
268362306a36Sopenharmony_ci		goto out_up;
268462306a36Sopenharmony_ci
268562306a36Sopenharmony_ci	mm->context.uses_skeys = 1;
268662306a36Sopenharmony_ci	rc = gmap_mark_unmergeable();
268762306a36Sopenharmony_ci	if (rc) {
268862306a36Sopenharmony_ci		mm->context.uses_skeys = 0;
268962306a36Sopenharmony_ci		goto out_up;
269062306a36Sopenharmony_ci	}
269162306a36Sopenharmony_ci	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ciout_up:
269462306a36Sopenharmony_ci	mmap_write_unlock(mm);
269562306a36Sopenharmony_ci	return rc;
269662306a36Sopenharmony_ci}
269762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_enable_skey);
269862306a36Sopenharmony_ci
269962306a36Sopenharmony_ci/*
270062306a36Sopenharmony_ci * Reset CMMA state, make all pages stable again.
270162306a36Sopenharmony_ci */
270262306a36Sopenharmony_cistatic int __s390_reset_cmma(pte_t *pte, unsigned long addr,
270362306a36Sopenharmony_ci			     unsigned long next, struct mm_walk *walk)
270462306a36Sopenharmony_ci{
270562306a36Sopenharmony_ci	ptep_zap_unused(walk->mm, addr, pte, 1);
270662306a36Sopenharmony_ci	return 0;
270762306a36Sopenharmony_ci}
270862306a36Sopenharmony_ci
270962306a36Sopenharmony_cistatic const struct mm_walk_ops reset_cmma_walk_ops = {
271062306a36Sopenharmony_ci	.pte_entry		= __s390_reset_cmma,
271162306a36Sopenharmony_ci	.walk_lock		= PGWALK_WRLOCK,
271262306a36Sopenharmony_ci};
271362306a36Sopenharmony_ci
271462306a36Sopenharmony_civoid s390_reset_cmma(struct mm_struct *mm)
271562306a36Sopenharmony_ci{
271662306a36Sopenharmony_ci	mmap_write_lock(mm);
271762306a36Sopenharmony_ci	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
271862306a36Sopenharmony_ci	mmap_write_unlock(mm);
271962306a36Sopenharmony_ci}
272062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_reset_cmma);
272162306a36Sopenharmony_ci
272262306a36Sopenharmony_ci#define GATHER_GET_PAGES 32
272362306a36Sopenharmony_ci
272462306a36Sopenharmony_cistruct reset_walk_state {
272562306a36Sopenharmony_ci	unsigned long next;
272662306a36Sopenharmony_ci	unsigned long count;
272762306a36Sopenharmony_ci	unsigned long pfns[GATHER_GET_PAGES];
272862306a36Sopenharmony_ci};
272962306a36Sopenharmony_ci
273062306a36Sopenharmony_cistatic int s390_gather_pages(pte_t *ptep, unsigned long addr,
273162306a36Sopenharmony_ci			     unsigned long next, struct mm_walk *walk)
273262306a36Sopenharmony_ci{
273362306a36Sopenharmony_ci	struct reset_walk_state *p = walk->private;
273462306a36Sopenharmony_ci	pte_t pte = READ_ONCE(*ptep);
273562306a36Sopenharmony_ci
273662306a36Sopenharmony_ci	if (pte_present(pte)) {
273762306a36Sopenharmony_ci		/* we have a reference from the mapping, take an extra one */
273862306a36Sopenharmony_ci		get_page(phys_to_page(pte_val(pte)));
273962306a36Sopenharmony_ci		p->pfns[p->count] = phys_to_pfn(pte_val(pte));
274062306a36Sopenharmony_ci		p->next = next;
274162306a36Sopenharmony_ci		p->count++;
274262306a36Sopenharmony_ci	}
274362306a36Sopenharmony_ci	return p->count >= GATHER_GET_PAGES;
274462306a36Sopenharmony_ci}
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_cistatic const struct mm_walk_ops gather_pages_ops = {
274762306a36Sopenharmony_ci	.pte_entry = s390_gather_pages,
274862306a36Sopenharmony_ci	.walk_lock = PGWALK_RDLOCK,
274962306a36Sopenharmony_ci};
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci/*
275262306a36Sopenharmony_ci * Call the Destroy secure page UVC on each page in the given array of PFNs.
275362306a36Sopenharmony_ci * Each page needs to have an extra reference, which will be released here.
275462306a36Sopenharmony_ci */
275562306a36Sopenharmony_civoid s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
275662306a36Sopenharmony_ci{
275762306a36Sopenharmony_ci	unsigned long i;
275862306a36Sopenharmony_ci
275962306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
276062306a36Sopenharmony_ci		/* we always have an extra reference */
276162306a36Sopenharmony_ci		uv_destroy_owned_page(pfn_to_phys(pfns[i]));
276262306a36Sopenharmony_ci		/* get rid of the extra reference */
276362306a36Sopenharmony_ci		put_page(pfn_to_page(pfns[i]));
276462306a36Sopenharmony_ci		cond_resched();
276562306a36Sopenharmony_ci	}
276662306a36Sopenharmony_ci}
276762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
276862306a36Sopenharmony_ci
276962306a36Sopenharmony_ci/**
277062306a36Sopenharmony_ci * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
277162306a36Sopenharmony_ci * in the given range of the given address space.
277262306a36Sopenharmony_ci * @mm: the mm to operate on
277362306a36Sopenharmony_ci * @start: the start of the range
277462306a36Sopenharmony_ci * @end: the end of the range
277562306a36Sopenharmony_ci * @interruptible: if not 0, stop when a fatal signal is received
277662306a36Sopenharmony_ci *
277762306a36Sopenharmony_ci * Walk the given range of the given address space and call the destroy
277862306a36Sopenharmony_ci * secure page UVC on each page. Optionally exit early if a fatal signal is
277962306a36Sopenharmony_ci * pending.
278062306a36Sopenharmony_ci *
278162306a36Sopenharmony_ci * Return: 0 on success, -EINTR if the function stopped before completing
278262306a36Sopenharmony_ci */
278362306a36Sopenharmony_ciint __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
278462306a36Sopenharmony_ci			    unsigned long end, bool interruptible)
278562306a36Sopenharmony_ci{
278662306a36Sopenharmony_ci	struct reset_walk_state state = { .next = start };
278762306a36Sopenharmony_ci	int r = 1;
278862306a36Sopenharmony_ci
278962306a36Sopenharmony_ci	while (r > 0) {
279062306a36Sopenharmony_ci		state.count = 0;
279162306a36Sopenharmony_ci		mmap_read_lock(mm);
279262306a36Sopenharmony_ci		r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
279362306a36Sopenharmony_ci		mmap_read_unlock(mm);
279462306a36Sopenharmony_ci		cond_resched();
279562306a36Sopenharmony_ci		s390_uv_destroy_pfns(state.count, state.pfns);
279662306a36Sopenharmony_ci		if (interruptible && fatal_signal_pending(current))
279762306a36Sopenharmony_ci			return -EINTR;
279862306a36Sopenharmony_ci	}
279962306a36Sopenharmony_ci	return 0;
280062306a36Sopenharmony_ci}
280162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
280262306a36Sopenharmony_ci
280362306a36Sopenharmony_ci/**
280462306a36Sopenharmony_ci * s390_unlist_old_asce - Remove the topmost level of page tables from the
280562306a36Sopenharmony_ci * list of page tables of the gmap.
280662306a36Sopenharmony_ci * @gmap: the gmap whose table is to be removed
280762306a36Sopenharmony_ci *
280862306a36Sopenharmony_ci * On s390x, KVM keeps a list of all pages containing the page tables of the
280962306a36Sopenharmony_ci * gmap (the CRST list). This list is used at tear down time to free all
281062306a36Sopenharmony_ci * pages that are now not needed anymore.
281162306a36Sopenharmony_ci *
281262306a36Sopenharmony_ci * This function removes the topmost page of the tree (the one pointed to by
281362306a36Sopenharmony_ci * the ASCE) from the CRST list.
281462306a36Sopenharmony_ci *
281562306a36Sopenharmony_ci * This means that it will not be freed when the VM is torn down, and needs
281662306a36Sopenharmony_ci * to be handled separately by the caller, unless a leak is actually
281762306a36Sopenharmony_ci * intended. Notice that this function will only remove the page from the
281862306a36Sopenharmony_ci * list, the page will still be used as a top level page table (and ASCE).
281962306a36Sopenharmony_ci */
282062306a36Sopenharmony_civoid s390_unlist_old_asce(struct gmap *gmap)
282162306a36Sopenharmony_ci{
282262306a36Sopenharmony_ci	struct page *old;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci	old = virt_to_page(gmap->table);
282562306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
282662306a36Sopenharmony_ci	list_del(&old->lru);
282762306a36Sopenharmony_ci	/*
282862306a36Sopenharmony_ci	 * Sometimes the topmost page might need to be "removed" multiple
282962306a36Sopenharmony_ci	 * times, for example if the VM is rebooted into secure mode several
283062306a36Sopenharmony_ci	 * times concurrently, or if s390_replace_asce fails after calling
283162306a36Sopenharmony_ci	 * s390_remove_old_asce and is attempted again later. In that case
283262306a36Sopenharmony_ci	 * the old asce has been removed from the list, and therefore it
283362306a36Sopenharmony_ci	 * will not be freed when the VM terminates, but the ASCE is still
283462306a36Sopenharmony_ci	 * in use and still pointed to.
283562306a36Sopenharmony_ci	 * A subsequent call to replace_asce will follow the pointer and try
283662306a36Sopenharmony_ci	 * to remove the same page from the list again.
283762306a36Sopenharmony_ci	 * Therefore it's necessary that the page of the ASCE has valid
283862306a36Sopenharmony_ci	 * pointers, so list_del can work (and do nothing) without
283962306a36Sopenharmony_ci	 * dereferencing stale or invalid pointers.
284062306a36Sopenharmony_ci	 */
284162306a36Sopenharmony_ci	INIT_LIST_HEAD(&old->lru);
284262306a36Sopenharmony_ci	spin_unlock(&gmap->guest_table_lock);
284362306a36Sopenharmony_ci}
284462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_unlist_old_asce);
284562306a36Sopenharmony_ci
284662306a36Sopenharmony_ci/**
284762306a36Sopenharmony_ci * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
284862306a36Sopenharmony_ci * @gmap: the gmap whose ASCE needs to be replaced
284962306a36Sopenharmony_ci *
285062306a36Sopenharmony_ci * If the ASCE is a SEGMENT type then this function will return -EINVAL,
285162306a36Sopenharmony_ci * otherwise the pointers in the host_to_guest radix tree will keep pointing
285262306a36Sopenharmony_ci * to the wrong pages, causing use-after-free and memory corruption.
285362306a36Sopenharmony_ci * If the allocation of the new top level page table fails, the ASCE is not
285462306a36Sopenharmony_ci * replaced.
285562306a36Sopenharmony_ci * In any case, the old ASCE is always removed from the gmap CRST list.
285662306a36Sopenharmony_ci * Therefore the caller has to make sure to save a pointer to it
285762306a36Sopenharmony_ci * beforehand, unless a leak is actually intended.
285862306a36Sopenharmony_ci */
285962306a36Sopenharmony_ciint s390_replace_asce(struct gmap *gmap)
286062306a36Sopenharmony_ci{
286162306a36Sopenharmony_ci	unsigned long asce;
286262306a36Sopenharmony_ci	struct page *page;
286362306a36Sopenharmony_ci	void *table;
286462306a36Sopenharmony_ci
286562306a36Sopenharmony_ci	s390_unlist_old_asce(gmap);
286662306a36Sopenharmony_ci
286762306a36Sopenharmony_ci	/* Replacing segment type ASCEs would cause serious issues */
286862306a36Sopenharmony_ci	if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
286962306a36Sopenharmony_ci		return -EINVAL;
287062306a36Sopenharmony_ci
287162306a36Sopenharmony_ci	page = gmap_alloc_crst();
287262306a36Sopenharmony_ci	if (!page)
287362306a36Sopenharmony_ci		return -ENOMEM;
287462306a36Sopenharmony_ci	page->index = 0;
287562306a36Sopenharmony_ci	table = page_to_virt(page);
287662306a36Sopenharmony_ci	memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci	/*
287962306a36Sopenharmony_ci	 * The caller has to deal with the old ASCE, but here we make sure
288062306a36Sopenharmony_ci	 * the new one is properly added to the CRST list, so that
288162306a36Sopenharmony_ci	 * it will be freed when the VM is torn down.
288262306a36Sopenharmony_ci	 */
288362306a36Sopenharmony_ci	spin_lock(&gmap->guest_table_lock);
288462306a36Sopenharmony_ci	list_add(&page->lru, &gmap->crst_list);
288562306a36Sopenharmony_ci	spin_unlock(&gmap->guest_table_lock);
288662306a36Sopenharmony_ci
288762306a36Sopenharmony_ci	/* Set new table origin while preserving existing ASCE control bits */
288862306a36Sopenharmony_ci	asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
288962306a36Sopenharmony_ci	WRITE_ONCE(gmap->asce, asce);
289062306a36Sopenharmony_ci	WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
289162306a36Sopenharmony_ci	WRITE_ONCE(gmap->table, table);
289262306a36Sopenharmony_ci
289362306a36Sopenharmony_ci	return 0;
289462306a36Sopenharmony_ci}
289562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(s390_replace_asce);
2896