162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/*
462306a36Sopenharmony_ci *  Handling Page Tables through page fragments
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/kernel.h>
962306a36Sopenharmony_ci#include <linux/gfp.h>
1062306a36Sopenharmony_ci#include <linux/mm.h>
1162306a36Sopenharmony_ci#include <linux/percpu.h>
1262306a36Sopenharmony_ci#include <linux/hardirq.h>
1362306a36Sopenharmony_ci#include <linux/hugetlb.h>
1462306a36Sopenharmony_ci#include <asm/pgalloc.h>
1562306a36Sopenharmony_ci#include <asm/tlbflush.h>
1662306a36Sopenharmony_ci#include <asm/tlb.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_civoid pte_frag_destroy(void *pte_frag)
1962306a36Sopenharmony_ci{
2062306a36Sopenharmony_ci	int count;
2162306a36Sopenharmony_ci	struct ptdesc *ptdesc;
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci	ptdesc = virt_to_ptdesc(pte_frag);
2462306a36Sopenharmony_ci	/* drop all the pending references */
2562306a36Sopenharmony_ci	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
2662306a36Sopenharmony_ci	/* We allow PTE_FRAG_NR fragments from a PTE page */
2762306a36Sopenharmony_ci	if (atomic_sub_and_test(PTE_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
2862306a36Sopenharmony_ci		pagetable_pte_dtor(ptdesc);
2962306a36Sopenharmony_ci		pagetable_free(ptdesc);
3062306a36Sopenharmony_ci	}
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic pte_t *get_pte_from_cache(struct mm_struct *mm)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	void *pte_frag, *ret;
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	if (PTE_FRAG_NR == 1)
3862306a36Sopenharmony_ci		return NULL;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	spin_lock(&mm->page_table_lock);
4162306a36Sopenharmony_ci	ret = pte_frag_get(&mm->context);
4262306a36Sopenharmony_ci	if (ret) {
4362306a36Sopenharmony_ci		pte_frag = ret + PTE_FRAG_SIZE;
4462306a36Sopenharmony_ci		/*
4562306a36Sopenharmony_ci		 * If we have taken up all the fragments mark PTE page NULL
4662306a36Sopenharmony_ci		 */
4762306a36Sopenharmony_ci		if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
4862306a36Sopenharmony_ci			pte_frag = NULL;
4962306a36Sopenharmony_ci		pte_frag_set(&mm->context, pte_frag);
5062306a36Sopenharmony_ci	}
5162306a36Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
5262306a36Sopenharmony_ci	return (pte_t *)ret;
5362306a36Sopenharmony_ci}
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	void *ret = NULL;
5862306a36Sopenharmony_ci	struct ptdesc *ptdesc;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	if (!kernel) {
6162306a36Sopenharmony_ci		ptdesc = pagetable_alloc(PGALLOC_GFP | __GFP_ACCOUNT, 0);
6262306a36Sopenharmony_ci		if (!ptdesc)
6362306a36Sopenharmony_ci			return NULL;
6462306a36Sopenharmony_ci		if (!pagetable_pte_ctor(ptdesc)) {
6562306a36Sopenharmony_ci			pagetable_free(ptdesc);
6662306a36Sopenharmony_ci			return NULL;
6762306a36Sopenharmony_ci		}
6862306a36Sopenharmony_ci	} else {
6962306a36Sopenharmony_ci		ptdesc = pagetable_alloc(PGALLOC_GFP, 0);
7062306a36Sopenharmony_ci		if (!ptdesc)
7162306a36Sopenharmony_ci			return NULL;
7262306a36Sopenharmony_ci	}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	atomic_set(&ptdesc->pt_frag_refcount, 1);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	ret = ptdesc_address(ptdesc);
7762306a36Sopenharmony_ci	/*
7862306a36Sopenharmony_ci	 * if we support only one fragment just return the
7962306a36Sopenharmony_ci	 * allocated page.
8062306a36Sopenharmony_ci	 */
8162306a36Sopenharmony_ci	if (PTE_FRAG_NR == 1)
8262306a36Sopenharmony_ci		return ret;
8362306a36Sopenharmony_ci	spin_lock(&mm->page_table_lock);
8462306a36Sopenharmony_ci	/*
8562306a36Sopenharmony_ci	 * If we find ptdesc_page set, we return
8662306a36Sopenharmony_ci	 * the allocated page with single fragment
8762306a36Sopenharmony_ci	 * count.
8862306a36Sopenharmony_ci	 */
8962306a36Sopenharmony_ci	if (likely(!pte_frag_get(&mm->context))) {
9062306a36Sopenharmony_ci		atomic_set(&ptdesc->pt_frag_refcount, PTE_FRAG_NR);
9162306a36Sopenharmony_ci		pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
9262306a36Sopenharmony_ci	}
9362306a36Sopenharmony_ci	spin_unlock(&mm->page_table_lock);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	return (pte_t *)ret;
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cipte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	pte_t *pte;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	pte = get_pte_from_cache(mm);
10362306a36Sopenharmony_ci	if (pte)
10462306a36Sopenharmony_ci		return pte;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	return __alloc_for_ptecache(mm, kernel);
10762306a36Sopenharmony_ci}
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistatic void pte_free_now(struct rcu_head *head)
11062306a36Sopenharmony_ci{
11162306a36Sopenharmony_ci	struct ptdesc *ptdesc;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
11462306a36Sopenharmony_ci	pagetable_pte_dtor(ptdesc);
11562306a36Sopenharmony_ci	pagetable_free(ptdesc);
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_civoid pte_fragment_free(unsigned long *table, int kernel)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	struct ptdesc *ptdesc = virt_to_ptdesc(table);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if (pagetable_is_reserved(ptdesc))
12362306a36Sopenharmony_ci		return free_reserved_ptdesc(ptdesc);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
12662306a36Sopenharmony_ci	if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
12762306a36Sopenharmony_ci		if (kernel)
12862306a36Sopenharmony_ci			pagetable_free(ptdesc);
12962306a36Sopenharmony_ci		else if (folio_test_clear_active(ptdesc_folio(ptdesc)))
13062306a36Sopenharmony_ci			call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
13162306a36Sopenharmony_ci		else
13262306a36Sopenharmony_ci			pte_free_now(&ptdesc->pt_rcu_head);
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE
13762306a36Sopenharmony_civoid pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	struct page *page;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	page = virt_to_page(pgtable);
14262306a36Sopenharmony_ci	SetPageActive(page);
14362306a36Sopenharmony_ci	pte_fragment_free((unsigned long *)pgtable, 0);
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
146