18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * SN Platform GRU Driver
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This file contains code that handles TLB misses within the GRU.
88c2ecf20Sopenharmony_ci * These misses are reported either via interrupts or user polling of
98c2ecf20Sopenharmony_ci * the user CB.
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <linux/kernel.h>
158c2ecf20Sopenharmony_ci#include <linux/errno.h>
168c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
178c2ecf20Sopenharmony_ci#include <linux/mm.h>
188c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
198c2ecf20Sopenharmony_ci#include <linux/device.h>
208c2ecf20Sopenharmony_ci#include <linux/io.h>
218c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
228c2ecf20Sopenharmony_ci#include <linux/security.h>
238c2ecf20Sopenharmony_ci#include <linux/sync_core.h>
248c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
258c2ecf20Sopenharmony_ci#include "gru.h"
268c2ecf20Sopenharmony_ci#include "grutables.h"
278c2ecf20Sopenharmony_ci#include "grulib.h"
288c2ecf20Sopenharmony_ci#include "gru_instructions.h"
298c2ecf20Sopenharmony_ci#include <asm/uv/uv_hub.h>
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci/* Return codes for vtop functions */
328c2ecf20Sopenharmony_ci#define VTOP_SUCCESS               0
338c2ecf20Sopenharmony_ci#define VTOP_INVALID               -1
348c2ecf20Sopenharmony_ci#define VTOP_RETRY                 -2
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/*
388c2ecf20Sopenharmony_ci * Test if a physical address is a valid GRU GSEG address
398c2ecf20Sopenharmony_ci */
408c2ecf20Sopenharmony_cistatic inline int is_gru_paddr(unsigned long paddr)
418c2ecf20Sopenharmony_ci{
428c2ecf20Sopenharmony_ci	return paddr >= gru_start_paddr && paddr < gru_end_paddr;
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/*
468c2ecf20Sopenharmony_ci * Find the vma of a GRU segment. Caller must hold mmap_lock.
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_cistruct vm_area_struct *gru_find_vma(unsigned long vaddr)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	vma = find_vma(current->mm, vaddr);
538c2ecf20Sopenharmony_ci	if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
548c2ecf20Sopenharmony_ci		return vma;
558c2ecf20Sopenharmony_ci	return NULL;
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci/*
598c2ecf20Sopenharmony_ci * Find and lock the gts that contains the specified user vaddr.
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * Returns:
628c2ecf20Sopenharmony_ci * 	- *gts with the mmap_lock locked for read and the GTS locked.
638c2ecf20Sopenharmony_ci *	- NULL if vaddr invalid OR is not a valid GSEG vaddr.
648c2ecf20Sopenharmony_ci */
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_cistatic struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	struct mm_struct *mm = current->mm;
698c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
708c2ecf20Sopenharmony_ci	struct gru_thread_state *gts = NULL;
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	mmap_read_lock(mm);
738c2ecf20Sopenharmony_ci	vma = gru_find_vma(vaddr);
748c2ecf20Sopenharmony_ci	if (vma)
758c2ecf20Sopenharmony_ci		gts = gru_find_thread_state(vma, TSID(vaddr, vma));
768c2ecf20Sopenharmony_ci	if (gts)
778c2ecf20Sopenharmony_ci		mutex_lock(&gts->ts_ctxlock);
788c2ecf20Sopenharmony_ci	else
798c2ecf20Sopenharmony_ci		mmap_read_unlock(mm);
808c2ecf20Sopenharmony_ci	return gts;
818c2ecf20Sopenharmony_ci}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	struct mm_struct *mm = current->mm;
868c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
878c2ecf20Sopenharmony_ci	struct gru_thread_state *gts = ERR_PTR(-EINVAL);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	mmap_write_lock(mm);
908c2ecf20Sopenharmony_ci	vma = gru_find_vma(vaddr);
918c2ecf20Sopenharmony_ci	if (!vma)
928c2ecf20Sopenharmony_ci		goto err;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
958c2ecf20Sopenharmony_ci	if (IS_ERR(gts))
968c2ecf20Sopenharmony_ci		goto err;
978c2ecf20Sopenharmony_ci	mutex_lock(&gts->ts_ctxlock);
988c2ecf20Sopenharmony_ci	mmap_write_downgrade(mm);
998c2ecf20Sopenharmony_ci	return gts;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cierr:
1028c2ecf20Sopenharmony_ci	mmap_write_unlock(mm);
1038c2ecf20Sopenharmony_ci	return gts;
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci/*
1078c2ecf20Sopenharmony_ci * Unlock a GTS that was previously locked with gru_find_lock_gts().
1088c2ecf20Sopenharmony_ci */
1098c2ecf20Sopenharmony_cistatic void gru_unlock_gts(struct gru_thread_state *gts)
1108c2ecf20Sopenharmony_ci{
1118c2ecf20Sopenharmony_ci	mutex_unlock(&gts->ts_ctxlock);
1128c2ecf20Sopenharmony_ci	mmap_read_unlock(current->mm);
1138c2ecf20Sopenharmony_ci}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci/*
1168c2ecf20Sopenharmony_ci * Set a CB.istatus to active using a user virtual address. This must be done
1178c2ecf20Sopenharmony_ci * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
1188c2ecf20Sopenharmony_ci * If the line is evicted, the status may be lost. The in-cache update
1198c2ecf20Sopenharmony_ci * is necessary to prevent the user from seeing a stale cb.istatus that will
1208c2ecf20Sopenharmony_ci * change as soon as the TFH restart is complete. Races may cause an
1218c2ecf20Sopenharmony_ci * occasional failure to clear the cb.istatus, but that is ok.
1228c2ecf20Sopenharmony_ci */
1238c2ecf20Sopenharmony_cistatic void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	if (cbk) {
1268c2ecf20Sopenharmony_ci		cbk->istatus = CBS_ACTIVE;
1278c2ecf20Sopenharmony_ci	}
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci/*
1318c2ecf20Sopenharmony_ci * Read & clear a TFM
1328c2ecf20Sopenharmony_ci *
1338c2ecf20Sopenharmony_ci * The GRU has an array of fault maps. A map is private to a cpu
1348c2ecf20Sopenharmony_ci * Only one cpu will be accessing a cpu's fault map.
1358c2ecf20Sopenharmony_ci *
1368c2ecf20Sopenharmony_ci * This function scans the cpu-private fault map & clears all bits that
1378c2ecf20Sopenharmony_ci * are set. The function returns a bitmap that indicates the bits that
1388c2ecf20Sopenharmony_ci * were cleared. Note that sense the maps may be updated asynchronously by
1398c2ecf20Sopenharmony_ci * the GRU, atomic operations must be used to clear bits.
1408c2ecf20Sopenharmony_ci */
1418c2ecf20Sopenharmony_cistatic void get_clear_fault_map(struct gru_state *gru,
1428c2ecf20Sopenharmony_ci				struct gru_tlb_fault_map *imap,
1438c2ecf20Sopenharmony_ci				struct gru_tlb_fault_map *dmap)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	unsigned long i, k;
1468c2ecf20Sopenharmony_ci	struct gru_tlb_fault_map *tfm;
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
1498c2ecf20Sopenharmony_ci	prefetchw(tfm);		/* Helps on hardware, required for emulator */
1508c2ecf20Sopenharmony_ci	for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
1518c2ecf20Sopenharmony_ci		k = tfm->fault_bits[i];
1528c2ecf20Sopenharmony_ci		if (k)
1538c2ecf20Sopenharmony_ci			k = xchg(&tfm->fault_bits[i], 0UL);
1548c2ecf20Sopenharmony_ci		imap->fault_bits[i] = k;
1558c2ecf20Sopenharmony_ci		k = tfm->done_bits[i];
1568c2ecf20Sopenharmony_ci		if (k)
1578c2ecf20Sopenharmony_ci			k = xchg(&tfm->done_bits[i], 0UL);
1588c2ecf20Sopenharmony_ci		dmap->fault_bits[i] = k;
1598c2ecf20Sopenharmony_ci	}
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	/*
1628c2ecf20Sopenharmony_ci	 * Not functionally required but helps performance. (Required
1638c2ecf20Sopenharmony_ci	 * on emulator)
1648c2ecf20Sopenharmony_ci	 */
1658c2ecf20Sopenharmony_ci	gru_flush_cache(tfm);
1668c2ecf20Sopenharmony_ci}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci/*
1698c2ecf20Sopenharmony_ci * Atomic (interrupt context) & non-atomic (user context) functions to
1708c2ecf20Sopenharmony_ci * convert a vaddr into a physical address. The size of the page
1718c2ecf20Sopenharmony_ci * is returned in pageshift.
1728c2ecf20Sopenharmony_ci * 	returns:
1738c2ecf20Sopenharmony_ci * 		  0 - successful
1748c2ecf20Sopenharmony_ci * 		< 0 - error code
1758c2ecf20Sopenharmony_ci * 		  1 - (atomic only) try again in non-atomic context
1768c2ecf20Sopenharmony_ci */
1778c2ecf20Sopenharmony_cistatic int non_atomic_pte_lookup(struct vm_area_struct *vma,
1788c2ecf20Sopenharmony_ci				 unsigned long vaddr, int write,
1798c2ecf20Sopenharmony_ci				 unsigned long *paddr, int *pageshift)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	struct page *page;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE
1848c2ecf20Sopenharmony_ci	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
1858c2ecf20Sopenharmony_ci#else
1868c2ecf20Sopenharmony_ci	*pageshift = PAGE_SHIFT;
1878c2ecf20Sopenharmony_ci#endif
1888c2ecf20Sopenharmony_ci	if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
1898c2ecf20Sopenharmony_ci		return -EFAULT;
1908c2ecf20Sopenharmony_ci	*paddr = page_to_phys(page);
1918c2ecf20Sopenharmony_ci	put_page(page);
1928c2ecf20Sopenharmony_ci	return 0;
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci/*
1968c2ecf20Sopenharmony_ci * atomic_pte_lookup
1978c2ecf20Sopenharmony_ci *
1988c2ecf20Sopenharmony_ci * Convert a user virtual address to a physical address
1998c2ecf20Sopenharmony_ci * Only supports Intel large pages (2MB only) on x86_64.
2008c2ecf20Sopenharmony_ci *	ZZZ - hugepage support is incomplete
2018c2ecf20Sopenharmony_ci *
2028c2ecf20Sopenharmony_ci * NOTE: mmap_lock is already held on entry to this function. This
2038c2ecf20Sopenharmony_ci * guarantees existence of the page tables.
2048c2ecf20Sopenharmony_ci */
2058c2ecf20Sopenharmony_cistatic int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
2068c2ecf20Sopenharmony_ci	int write, unsigned long *paddr, int *pageshift)
2078c2ecf20Sopenharmony_ci{
2088c2ecf20Sopenharmony_ci	pgd_t *pgdp;
2098c2ecf20Sopenharmony_ci	p4d_t *p4dp;
2108c2ecf20Sopenharmony_ci	pud_t *pudp;
2118c2ecf20Sopenharmony_ci	pmd_t *pmdp;
2128c2ecf20Sopenharmony_ci	pte_t pte;
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	pgdp = pgd_offset(vma->vm_mm, vaddr);
2158c2ecf20Sopenharmony_ci	if (unlikely(pgd_none(*pgdp)))
2168c2ecf20Sopenharmony_ci		goto err;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	p4dp = p4d_offset(pgdp, vaddr);
2198c2ecf20Sopenharmony_ci	if (unlikely(p4d_none(*p4dp)))
2208c2ecf20Sopenharmony_ci		goto err;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	pudp = pud_offset(p4dp, vaddr);
2238c2ecf20Sopenharmony_ci	if (unlikely(pud_none(*pudp)))
2248c2ecf20Sopenharmony_ci		goto err;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	pmdp = pmd_offset(pudp, vaddr);
2278c2ecf20Sopenharmony_ci	if (unlikely(pmd_none(*pmdp)))
2288c2ecf20Sopenharmony_ci		goto err;
2298c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
2308c2ecf20Sopenharmony_ci	if (unlikely(pmd_large(*pmdp)))
2318c2ecf20Sopenharmony_ci		pte = *(pte_t *) pmdp;
2328c2ecf20Sopenharmony_ci	else
2338c2ecf20Sopenharmony_ci#endif
2348c2ecf20Sopenharmony_ci		pte = *pte_offset_kernel(pmdp, vaddr);
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	if (unlikely(!pte_present(pte) ||
2378c2ecf20Sopenharmony_ci		     (write && (!pte_write(pte) || !pte_dirty(pte)))))
2388c2ecf20Sopenharmony_ci		return 1;
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	*paddr = pte_pfn(pte) << PAGE_SHIFT;
2418c2ecf20Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE
2428c2ecf20Sopenharmony_ci	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
2438c2ecf20Sopenharmony_ci#else
2448c2ecf20Sopenharmony_ci	*pageshift = PAGE_SHIFT;
2458c2ecf20Sopenharmony_ci#endif
2468c2ecf20Sopenharmony_ci	return 0;
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_cierr:
2498c2ecf20Sopenharmony_ci	return 1;
2508c2ecf20Sopenharmony_ci}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_cistatic int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
2538c2ecf20Sopenharmony_ci		    int write, int atomic, unsigned long *gpa, int *pageshift)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	struct mm_struct *mm = gts->ts_mm;
2568c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
2578c2ecf20Sopenharmony_ci	unsigned long paddr;
2588c2ecf20Sopenharmony_ci	int ret, ps;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	vma = find_vma(mm, vaddr);
2618c2ecf20Sopenharmony_ci	if (!vma)
2628c2ecf20Sopenharmony_ci		goto inval;
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	/*
2658c2ecf20Sopenharmony_ci	 * Atomic lookup is faster & usually works even if called in non-atomic
2668c2ecf20Sopenharmony_ci	 * context.
2678c2ecf20Sopenharmony_ci	 */
2688c2ecf20Sopenharmony_ci	rmb();	/* Must/check ms_range_active before loading PTEs */
2698c2ecf20Sopenharmony_ci	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
2708c2ecf20Sopenharmony_ci	if (ret) {
2718c2ecf20Sopenharmony_ci		if (atomic)
2728c2ecf20Sopenharmony_ci			goto upm;
2738c2ecf20Sopenharmony_ci		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
2748c2ecf20Sopenharmony_ci			goto inval;
2758c2ecf20Sopenharmony_ci	}
2768c2ecf20Sopenharmony_ci	if (is_gru_paddr(paddr))
2778c2ecf20Sopenharmony_ci		goto inval;
2788c2ecf20Sopenharmony_ci	paddr = paddr & ~((1UL << ps) - 1);
2798c2ecf20Sopenharmony_ci	*gpa = uv_soc_phys_ram_to_gpa(paddr);
2808c2ecf20Sopenharmony_ci	*pageshift = ps;
2818c2ecf20Sopenharmony_ci	return VTOP_SUCCESS;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ciinval:
2848c2ecf20Sopenharmony_ci	return VTOP_INVALID;
2858c2ecf20Sopenharmony_ciupm:
2868c2ecf20Sopenharmony_ci	return VTOP_RETRY;
2878c2ecf20Sopenharmony_ci}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci/*
2918c2ecf20Sopenharmony_ci * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
2928c2ecf20Sopenharmony_ci * CBE cacheline so that the line will be written back to home agent.
2938c2ecf20Sopenharmony_ci * Otherwise the line may be silently dropped. This has no impact
2948c2ecf20Sopenharmony_ci * except on performance.
2958c2ecf20Sopenharmony_ci */
2968c2ecf20Sopenharmony_cistatic void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
2978c2ecf20Sopenharmony_ci{
2988c2ecf20Sopenharmony_ci	if (unlikely(cbe)) {
2998c2ecf20Sopenharmony_ci		cbe->cbrexecstatus = 0;         /* make CL dirty */
3008c2ecf20Sopenharmony_ci		gru_flush_cache(cbe);
3018c2ecf20Sopenharmony_ci	}
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/*
3058c2ecf20Sopenharmony_ci * Preload the TLB with entries that may be required. Currently, preloading
3068c2ecf20Sopenharmony_ci * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
3078c2ecf20Sopenharmony_ci * the end of the bcopy tranfer, whichever is smaller.
3088c2ecf20Sopenharmony_ci */
3098c2ecf20Sopenharmony_cistatic void gru_preload_tlb(struct gru_state *gru,
3108c2ecf20Sopenharmony_ci			struct gru_thread_state *gts, int atomic,
3118c2ecf20Sopenharmony_ci			unsigned long fault_vaddr, int asid, int write,
3128c2ecf20Sopenharmony_ci			unsigned char tlb_preload_count,
3138c2ecf20Sopenharmony_ci			struct gru_tlb_fault_handle *tfh,
3148c2ecf20Sopenharmony_ci			struct gru_control_block_extended *cbe)
3158c2ecf20Sopenharmony_ci{
3168c2ecf20Sopenharmony_ci	unsigned long vaddr = 0, gpa;
3178c2ecf20Sopenharmony_ci	int ret, pageshift;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	if (cbe->opccpy != OP_BCOPY)
3208c2ecf20Sopenharmony_ci		return;
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	if (fault_vaddr == cbe->cbe_baddr0)
3238c2ecf20Sopenharmony_ci		vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
3248c2ecf20Sopenharmony_ci	else if (fault_vaddr == cbe->cbe_baddr1)
3258c2ecf20Sopenharmony_ci		vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	fault_vaddr &= PAGE_MASK;
3288c2ecf20Sopenharmony_ci	vaddr &= PAGE_MASK;
3298c2ecf20Sopenharmony_ci	vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	while (vaddr > fault_vaddr) {
3328c2ecf20Sopenharmony_ci		ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
3338c2ecf20Sopenharmony_ci		if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
3348c2ecf20Sopenharmony_ci					  GRU_PAGESIZE(pageshift)))
3358c2ecf20Sopenharmony_ci			return;
3368c2ecf20Sopenharmony_ci		gru_dbg(grudev,
3378c2ecf20Sopenharmony_ci			"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
3388c2ecf20Sopenharmony_ci			atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
3398c2ecf20Sopenharmony_ci			vaddr, asid, write, pageshift, gpa);
3408c2ecf20Sopenharmony_ci		vaddr -= PAGE_SIZE;
3418c2ecf20Sopenharmony_ci		STAT(tlb_preload_page);
3428c2ecf20Sopenharmony_ci	}
3438c2ecf20Sopenharmony_ci}
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci/*
3468c2ecf20Sopenharmony_ci * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
3478c2ecf20Sopenharmony_ci *	Input:
3488c2ecf20Sopenharmony_ci *		cb    Address of user CBR. Null if not running in user context
3498c2ecf20Sopenharmony_ci * 	Return:
3508c2ecf20Sopenharmony_ci * 		  0 = dropin, exception, or switch to UPM successful
3518c2ecf20Sopenharmony_ci * 		  1 = range invalidate active
3528c2ecf20Sopenharmony_ci * 		< 0 = error code
3538c2ecf20Sopenharmony_ci *
3548c2ecf20Sopenharmony_ci */
3558c2ecf20Sopenharmony_cistatic int gru_try_dropin(struct gru_state *gru,
3568c2ecf20Sopenharmony_ci			  struct gru_thread_state *gts,
3578c2ecf20Sopenharmony_ci			  struct gru_tlb_fault_handle *tfh,
3588c2ecf20Sopenharmony_ci			  struct gru_instruction_bits *cbk)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	struct gru_control_block_extended *cbe = NULL;
3618c2ecf20Sopenharmony_ci	unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
3628c2ecf20Sopenharmony_ci	int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
3638c2ecf20Sopenharmony_ci	unsigned long gpa = 0, vaddr = 0;
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	/*
3668c2ecf20Sopenharmony_ci	 * NOTE: The GRU contains magic hardware that eliminates races between
3678c2ecf20Sopenharmony_ci	 * TLB invalidates and TLB dropins. If an invalidate occurs
3688c2ecf20Sopenharmony_ci	 * in the window between reading the TFH and the subsequent TLB dropin,
3698c2ecf20Sopenharmony_ci	 * the dropin is ignored. This eliminates the need for additional locks.
3708c2ecf20Sopenharmony_ci	 */
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	/*
3738c2ecf20Sopenharmony_ci	 * Prefetch the CBE if doing TLB preloading
3748c2ecf20Sopenharmony_ci	 */
3758c2ecf20Sopenharmony_ci	if (unlikely(tlb_preload_count)) {
3768c2ecf20Sopenharmony_ci		cbe = gru_tfh_to_cbe(tfh);
3778c2ecf20Sopenharmony_ci		prefetchw(cbe);
3788c2ecf20Sopenharmony_ci	}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	/*
3818c2ecf20Sopenharmony_ci	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
3828c2ecf20Sopenharmony_ci	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
3838c2ecf20Sopenharmony_ci	 * is a transient state.
3848c2ecf20Sopenharmony_ci	 */
3858c2ecf20Sopenharmony_ci	if (tfh->status != TFHSTATUS_EXCEPTION) {
3868c2ecf20Sopenharmony_ci		gru_flush_cache(tfh);
3878c2ecf20Sopenharmony_ci		sync_core();
3888c2ecf20Sopenharmony_ci		if (tfh->status != TFHSTATUS_EXCEPTION)
3898c2ecf20Sopenharmony_ci			goto failnoexception;
3908c2ecf20Sopenharmony_ci		STAT(tfh_stale_on_fault);
3918c2ecf20Sopenharmony_ci	}
3928c2ecf20Sopenharmony_ci	if (tfh->state == TFHSTATE_IDLE)
3938c2ecf20Sopenharmony_ci		goto failidle;
3948c2ecf20Sopenharmony_ci	if (tfh->state == TFHSTATE_MISS_FMM && cbk)
3958c2ecf20Sopenharmony_ci		goto failfmm;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
3988c2ecf20Sopenharmony_ci	vaddr = tfh->missvaddr;
3998c2ecf20Sopenharmony_ci	asid = tfh->missasid;
4008c2ecf20Sopenharmony_ci	indexway = tfh->indexway;
4018c2ecf20Sopenharmony_ci	if (asid == 0)
4028c2ecf20Sopenharmony_ci		goto failnoasid;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	rmb();	/* TFH must be cache resident before reading ms_range_active */
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	/*
4078c2ecf20Sopenharmony_ci	 * TFH is cache resident - at least briefly. Fail the dropin
4088c2ecf20Sopenharmony_ci	 * if a range invalidate is active.
4098c2ecf20Sopenharmony_ci	 */
4108c2ecf20Sopenharmony_ci	if (atomic_read(&gts->ts_gms->ms_range_active))
4118c2ecf20Sopenharmony_ci		goto failactive;
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci	ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
4148c2ecf20Sopenharmony_ci	if (ret == VTOP_INVALID)
4158c2ecf20Sopenharmony_ci		goto failinval;
4168c2ecf20Sopenharmony_ci	if (ret == VTOP_RETRY)
4178c2ecf20Sopenharmony_ci		goto failupm;
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
4208c2ecf20Sopenharmony_ci		gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
4218c2ecf20Sopenharmony_ci		if (atomic || !gru_update_cch(gts)) {
4228c2ecf20Sopenharmony_ci			gts->ts_force_cch_reload = 1;
4238c2ecf20Sopenharmony_ci			goto failupm;
4248c2ecf20Sopenharmony_ci		}
4258c2ecf20Sopenharmony_ci	}
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
4288c2ecf20Sopenharmony_ci		gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
4298c2ecf20Sopenharmony_ci		gru_flush_cache_cbe(cbe);
4308c2ecf20Sopenharmony_ci	}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	gru_cb_set_istatus_active(cbk);
4338c2ecf20Sopenharmony_ci	gts->ustats.tlbdropin++;
4348c2ecf20Sopenharmony_ci	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
4358c2ecf20Sopenharmony_ci			  GRU_PAGESIZE(pageshift));
4368c2ecf20Sopenharmony_ci	gru_dbg(grudev,
4378c2ecf20Sopenharmony_ci		"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
4388c2ecf20Sopenharmony_ci		" rw %d, ps %d, gpa 0x%lx\n",
4398c2ecf20Sopenharmony_ci		atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
4408c2ecf20Sopenharmony_ci		indexway, write, pageshift, gpa);
4418c2ecf20Sopenharmony_ci	STAT(tlb_dropin);
4428c2ecf20Sopenharmony_ci	return 0;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_cifailnoasid:
4458c2ecf20Sopenharmony_ci	/* No asid (delayed unload). */
4468c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_no_asid);
4478c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
4488c2ecf20Sopenharmony_ci	if (!cbk)
4498c2ecf20Sopenharmony_ci		tfh_user_polling_mode(tfh);
4508c2ecf20Sopenharmony_ci	else
4518c2ecf20Sopenharmony_ci		gru_flush_cache(tfh);
4528c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4538c2ecf20Sopenharmony_ci	return -EAGAIN;
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_cifailupm:
4568c2ecf20Sopenharmony_ci	/* Atomic failure switch CBR to UPM */
4578c2ecf20Sopenharmony_ci	tfh_user_polling_mode(tfh);
4588c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4598c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_upm);
4608c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
4618c2ecf20Sopenharmony_ci	return 1;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_cifailfmm:
4648c2ecf20Sopenharmony_ci	/* FMM state on UPM call */
4658c2ecf20Sopenharmony_ci	gru_flush_cache(tfh);
4668c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4678c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_fmm);
4688c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
4698c2ecf20Sopenharmony_ci	return 0;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_cifailnoexception:
4728c2ecf20Sopenharmony_ci	/* TFH status did not show exception pending */
4738c2ecf20Sopenharmony_ci	gru_flush_cache(tfh);
4748c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4758c2ecf20Sopenharmony_ci	if (cbk)
4768c2ecf20Sopenharmony_ci		gru_flush_cache(cbk);
4778c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_no_exception);
4788c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
4798c2ecf20Sopenharmony_ci		tfh, tfh->status, tfh->state);
4808c2ecf20Sopenharmony_ci	return 0;
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_cifailidle:
4838c2ecf20Sopenharmony_ci	/* TFH state was idle  - no miss pending */
4848c2ecf20Sopenharmony_ci	gru_flush_cache(tfh);
4858c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4868c2ecf20Sopenharmony_ci	if (cbk)
4878c2ecf20Sopenharmony_ci		gru_flush_cache(cbk);
4888c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_idle);
4898c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
4908c2ecf20Sopenharmony_ci	return 0;
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_cifailinval:
4938c2ecf20Sopenharmony_ci	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
4948c2ecf20Sopenharmony_ci	tfh_exception(tfh);
4958c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
4968c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_invalid);
4978c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
4988c2ecf20Sopenharmony_ci	return -EFAULT;
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_cifailactive:
5018c2ecf20Sopenharmony_ci	/* Range invalidate active. Switch to UPM iff atomic */
5028c2ecf20Sopenharmony_ci	if (!cbk)
5038c2ecf20Sopenharmony_ci		tfh_user_polling_mode(tfh);
5048c2ecf20Sopenharmony_ci	else
5058c2ecf20Sopenharmony_ci		gru_flush_cache(tfh);
5068c2ecf20Sopenharmony_ci	gru_flush_cache_cbe(cbe);
5078c2ecf20Sopenharmony_ci	STAT(tlb_dropin_fail_range_active);
5088c2ecf20Sopenharmony_ci	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
5098c2ecf20Sopenharmony_ci		tfh, vaddr);
5108c2ecf20Sopenharmony_ci	return 1;
5118c2ecf20Sopenharmony_ci}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci/*
5148c2ecf20Sopenharmony_ci * Process an external interrupt from the GRU. This interrupt is
5158c2ecf20Sopenharmony_ci * caused by a TLB miss.
5168c2ecf20Sopenharmony_ci * Note that this is the interrupt handler that is registered with linux
5178c2ecf20Sopenharmony_ci * interrupt handlers.
5188c2ecf20Sopenharmony_ci */
5198c2ecf20Sopenharmony_cistatic irqreturn_t gru_intr(int chiplet, int blade)
5208c2ecf20Sopenharmony_ci{
5218c2ecf20Sopenharmony_ci	struct gru_state *gru;
5228c2ecf20Sopenharmony_ci	struct gru_tlb_fault_map imap, dmap;
5238c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
5248c2ecf20Sopenharmony_ci	struct gru_tlb_fault_handle *tfh = NULL;
5258c2ecf20Sopenharmony_ci	struct completion *cmp;
5268c2ecf20Sopenharmony_ci	int cbrnum, ctxnum;
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	STAT(intr);
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	gru = &gru_base[blade]->bs_grus[chiplet];
5318c2ecf20Sopenharmony_ci	if (!gru) {
5328c2ecf20Sopenharmony_ci		dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
5338c2ecf20Sopenharmony_ci			raw_smp_processor_id(), chiplet);
5348c2ecf20Sopenharmony_ci		return IRQ_NONE;
5358c2ecf20Sopenharmony_ci	}
5368c2ecf20Sopenharmony_ci	get_clear_fault_map(gru, &imap, &dmap);
5378c2ecf20Sopenharmony_ci	gru_dbg(grudev,
5388c2ecf20Sopenharmony_ci		"cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
5398c2ecf20Sopenharmony_ci		smp_processor_id(), chiplet, gru->gs_gid,
5408c2ecf20Sopenharmony_ci		imap.fault_bits[0], imap.fault_bits[1],
5418c2ecf20Sopenharmony_ci		dmap.fault_bits[0], dmap.fault_bits[1]);
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
5448c2ecf20Sopenharmony_ci		STAT(intr_cbr);
5458c2ecf20Sopenharmony_ci		cmp = gru->gs_blade->bs_async_wq;
5468c2ecf20Sopenharmony_ci		if (cmp)
5478c2ecf20Sopenharmony_ci			complete(cmp);
5488c2ecf20Sopenharmony_ci		gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
5498c2ecf20Sopenharmony_ci			gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
5508c2ecf20Sopenharmony_ci	}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
5538c2ecf20Sopenharmony_ci		STAT(intr_tfh);
5548c2ecf20Sopenharmony_ci		tfh = get_tfh_by_index(gru, cbrnum);
5558c2ecf20Sopenharmony_ci		prefetchw(tfh);	/* Helps on hdw, required for emulator */
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci		/*
5588c2ecf20Sopenharmony_ci		 * When hardware sets a bit in the faultmap, it implicitly
5598c2ecf20Sopenharmony_ci		 * locks the GRU context so that it cannot be unloaded.
5608c2ecf20Sopenharmony_ci		 * The gts cannot change until a TFH start/writestart command
5618c2ecf20Sopenharmony_ci		 * is issued.
5628c2ecf20Sopenharmony_ci		 */
5638c2ecf20Sopenharmony_ci		ctxnum = tfh->ctxnum;
5648c2ecf20Sopenharmony_ci		gts = gru->gs_gts[ctxnum];
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci		/* Spurious interrupts can cause this. Ignore. */
5678c2ecf20Sopenharmony_ci		if (!gts) {
5688c2ecf20Sopenharmony_ci			STAT(intr_spurious);
5698c2ecf20Sopenharmony_ci			continue;
5708c2ecf20Sopenharmony_ci		}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci		/*
5738c2ecf20Sopenharmony_ci		 * This is running in interrupt context. Trylock the mmap_lock.
5748c2ecf20Sopenharmony_ci		 * If it fails, retry the fault in user context.
5758c2ecf20Sopenharmony_ci		 */
5768c2ecf20Sopenharmony_ci		gts->ustats.fmm_tlbmiss++;
5778c2ecf20Sopenharmony_ci		if (!gts->ts_force_cch_reload &&
5788c2ecf20Sopenharmony_ci					mmap_read_trylock(gts->ts_mm)) {
5798c2ecf20Sopenharmony_ci			gru_try_dropin(gru, gts, tfh, NULL);
5808c2ecf20Sopenharmony_ci			mmap_read_unlock(gts->ts_mm);
5818c2ecf20Sopenharmony_ci		} else {
5828c2ecf20Sopenharmony_ci			tfh_user_polling_mode(tfh);
5838c2ecf20Sopenharmony_ci			STAT(intr_mm_lock_failed);
5848c2ecf20Sopenharmony_ci		}
5858c2ecf20Sopenharmony_ci	}
5868c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
5878c2ecf20Sopenharmony_ci}
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ciirqreturn_t gru0_intr(int irq, void *dev_id)
5908c2ecf20Sopenharmony_ci{
5918c2ecf20Sopenharmony_ci	return gru_intr(0, uv_numa_blade_id());
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ciirqreturn_t gru1_intr(int irq, void *dev_id)
5958c2ecf20Sopenharmony_ci{
5968c2ecf20Sopenharmony_ci	return gru_intr(1, uv_numa_blade_id());
5978c2ecf20Sopenharmony_ci}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ciirqreturn_t gru_intr_mblade(int irq, void *dev_id)
6008c2ecf20Sopenharmony_ci{
6018c2ecf20Sopenharmony_ci	int blade;
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	for_each_possible_blade(blade) {
6048c2ecf20Sopenharmony_ci		if (uv_blade_nr_possible_cpus(blade))
6058c2ecf20Sopenharmony_ci			continue;
6068c2ecf20Sopenharmony_ci		gru_intr(0, blade);
6078c2ecf20Sopenharmony_ci		gru_intr(1, blade);
6088c2ecf20Sopenharmony_ci	}
6098c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
6108c2ecf20Sopenharmony_ci}
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_cistatic int gru_user_dropin(struct gru_thread_state *gts,
6148c2ecf20Sopenharmony_ci			   struct gru_tlb_fault_handle *tfh,
6158c2ecf20Sopenharmony_ci			   void *cb)
6168c2ecf20Sopenharmony_ci{
6178c2ecf20Sopenharmony_ci	struct gru_mm_struct *gms = gts->ts_gms;
6188c2ecf20Sopenharmony_ci	int ret;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	gts->ustats.upm_tlbmiss++;
6218c2ecf20Sopenharmony_ci	while (1) {
6228c2ecf20Sopenharmony_ci		wait_event(gms->ms_wait_queue,
6238c2ecf20Sopenharmony_ci			   atomic_read(&gms->ms_range_active) == 0);
6248c2ecf20Sopenharmony_ci		prefetchw(tfh);	/* Helps on hdw, required for emulator */
6258c2ecf20Sopenharmony_ci		ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
6268c2ecf20Sopenharmony_ci		if (ret <= 0)
6278c2ecf20Sopenharmony_ci			return ret;
6288c2ecf20Sopenharmony_ci		STAT(call_os_wait_queue);
6298c2ecf20Sopenharmony_ci	}
6308c2ecf20Sopenharmony_ci}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci/*
6338c2ecf20Sopenharmony_ci * This interface is called as a result of a user detecting a "call OS" bit
6348c2ecf20Sopenharmony_ci * in a user CB. Normally means that a TLB fault has occurred.
6358c2ecf20Sopenharmony_ci * 	cb - user virtual address of the CB
6368c2ecf20Sopenharmony_ci */
6378c2ecf20Sopenharmony_ciint gru_handle_user_call_os(unsigned long cb)
6388c2ecf20Sopenharmony_ci{
6398c2ecf20Sopenharmony_ci	struct gru_tlb_fault_handle *tfh;
6408c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
6418c2ecf20Sopenharmony_ci	void *cbk;
6428c2ecf20Sopenharmony_ci	int ucbnum, cbrnum, ret = -EINVAL;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	STAT(call_os);
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci	/* sanity check the cb pointer */
6478c2ecf20Sopenharmony_ci	ucbnum = get_cb_number((void *)cb);
6488c2ecf20Sopenharmony_ci	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
6498c2ecf20Sopenharmony_ci		return -EINVAL;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ciagain:
6528c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(cb);
6538c2ecf20Sopenharmony_ci	if (!gts)
6548c2ecf20Sopenharmony_ci		return -EINVAL;
6558c2ecf20Sopenharmony_ci	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
6588c2ecf20Sopenharmony_ci		goto exit;
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci	if (gru_check_context_placement(gts)) {
6618c2ecf20Sopenharmony_ci		gru_unlock_gts(gts);
6628c2ecf20Sopenharmony_ci		gru_unload_context(gts, 1);
6638c2ecf20Sopenharmony_ci		goto again;
6648c2ecf20Sopenharmony_ci	}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	/*
6678c2ecf20Sopenharmony_ci	 * CCH may contain stale data if ts_force_cch_reload is set.
6688c2ecf20Sopenharmony_ci	 */
6698c2ecf20Sopenharmony_ci	if (gts->ts_gru && gts->ts_force_cch_reload) {
6708c2ecf20Sopenharmony_ci		gts->ts_force_cch_reload = 0;
6718c2ecf20Sopenharmony_ci		gru_update_cch(gts);
6728c2ecf20Sopenharmony_ci	}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	ret = -EAGAIN;
6758c2ecf20Sopenharmony_ci	cbrnum = thread_cbr_number(gts, ucbnum);
6768c2ecf20Sopenharmony_ci	if (gts->ts_gru) {
6778c2ecf20Sopenharmony_ci		tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
6788c2ecf20Sopenharmony_ci		cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
6798c2ecf20Sopenharmony_ci				gts->ts_ctxnum, ucbnum);
6808c2ecf20Sopenharmony_ci		ret = gru_user_dropin(gts, tfh, cbk);
6818c2ecf20Sopenharmony_ci	}
6828c2ecf20Sopenharmony_ciexit:
6838c2ecf20Sopenharmony_ci	gru_unlock_gts(gts);
6848c2ecf20Sopenharmony_ci	return ret;
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci/*
6888c2ecf20Sopenharmony_ci * Fetch the exception detail information for a CB that terminated with
6898c2ecf20Sopenharmony_ci * an exception.
6908c2ecf20Sopenharmony_ci */
6918c2ecf20Sopenharmony_ciint gru_get_exception_detail(unsigned long arg)
6928c2ecf20Sopenharmony_ci{
6938c2ecf20Sopenharmony_ci	struct control_block_extended_exc_detail excdet;
6948c2ecf20Sopenharmony_ci	struct gru_control_block_extended *cbe;
6958c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
6968c2ecf20Sopenharmony_ci	int ucbnum, cbrnum, ret;
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	STAT(user_exception);
6998c2ecf20Sopenharmony_ci	if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
7008c2ecf20Sopenharmony_ci		return -EFAULT;
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(excdet.cb);
7038c2ecf20Sopenharmony_ci	if (!gts)
7048c2ecf20Sopenharmony_ci		return -EINVAL;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
7078c2ecf20Sopenharmony_ci	ucbnum = get_cb_number((void *)excdet.cb);
7088c2ecf20Sopenharmony_ci	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
7098c2ecf20Sopenharmony_ci		ret = -EINVAL;
7108c2ecf20Sopenharmony_ci	} else if (gts->ts_gru) {
7118c2ecf20Sopenharmony_ci		cbrnum = thread_cbr_number(gts, ucbnum);
7128c2ecf20Sopenharmony_ci		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
7138c2ecf20Sopenharmony_ci		gru_flush_cache(cbe);	/* CBE not coherent */
7148c2ecf20Sopenharmony_ci		sync_core();		/* make sure we are have current data */
7158c2ecf20Sopenharmony_ci		excdet.opc = cbe->opccpy;
7168c2ecf20Sopenharmony_ci		excdet.exopc = cbe->exopccpy;
7178c2ecf20Sopenharmony_ci		excdet.ecause = cbe->ecause;
7188c2ecf20Sopenharmony_ci		excdet.exceptdet0 = cbe->idef1upd;
7198c2ecf20Sopenharmony_ci		excdet.exceptdet1 = cbe->idef3upd;
7208c2ecf20Sopenharmony_ci		excdet.cbrstate = cbe->cbrstate;
7218c2ecf20Sopenharmony_ci		excdet.cbrexecstatus = cbe->cbrexecstatus;
7228c2ecf20Sopenharmony_ci		gru_flush_cache_cbe(cbe);
7238c2ecf20Sopenharmony_ci		ret = 0;
7248c2ecf20Sopenharmony_ci	} else {
7258c2ecf20Sopenharmony_ci		ret = -EAGAIN;
7268c2ecf20Sopenharmony_ci	}
7278c2ecf20Sopenharmony_ci	gru_unlock_gts(gts);
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	gru_dbg(grudev,
7308c2ecf20Sopenharmony_ci		"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
7318c2ecf20Sopenharmony_ci		"exdet0 0x%lx, exdet1 0x%x\n",
7328c2ecf20Sopenharmony_ci		excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
7338c2ecf20Sopenharmony_ci		excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
7348c2ecf20Sopenharmony_ci	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
7358c2ecf20Sopenharmony_ci		ret = -EFAULT;
7368c2ecf20Sopenharmony_ci	return ret;
7378c2ecf20Sopenharmony_ci}
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci/*
7408c2ecf20Sopenharmony_ci * User request to unload a context. Content is saved for possible reload.
7418c2ecf20Sopenharmony_ci */
7428c2ecf20Sopenharmony_cistatic int gru_unload_all_contexts(void)
7438c2ecf20Sopenharmony_ci{
7448c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
7458c2ecf20Sopenharmony_ci	struct gru_state *gru;
7468c2ecf20Sopenharmony_ci	int gid, ctxnum;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
7498c2ecf20Sopenharmony_ci		return -EPERM;
7508c2ecf20Sopenharmony_ci	foreach_gid(gid) {
7518c2ecf20Sopenharmony_ci		gru = GID_TO_GRU(gid);
7528c2ecf20Sopenharmony_ci		spin_lock(&gru->gs_lock);
7538c2ecf20Sopenharmony_ci		for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
7548c2ecf20Sopenharmony_ci			gts = gru->gs_gts[ctxnum];
7558c2ecf20Sopenharmony_ci			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
7568c2ecf20Sopenharmony_ci				spin_unlock(&gru->gs_lock);
7578c2ecf20Sopenharmony_ci				gru_unload_context(gts, 1);
7588c2ecf20Sopenharmony_ci				mutex_unlock(&gts->ts_ctxlock);
7598c2ecf20Sopenharmony_ci				spin_lock(&gru->gs_lock);
7608c2ecf20Sopenharmony_ci			}
7618c2ecf20Sopenharmony_ci		}
7628c2ecf20Sopenharmony_ci		spin_unlock(&gru->gs_lock);
7638c2ecf20Sopenharmony_ci	}
7648c2ecf20Sopenharmony_ci	return 0;
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ciint gru_user_unload_context(unsigned long arg)
7688c2ecf20Sopenharmony_ci{
7698c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
7708c2ecf20Sopenharmony_ci	struct gru_unload_context_req req;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	STAT(user_unload_context);
7738c2ecf20Sopenharmony_ci	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
7748c2ecf20Sopenharmony_ci		return -EFAULT;
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	if (!req.gseg)
7798c2ecf20Sopenharmony_ci		return gru_unload_all_contexts();
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(req.gseg);
7828c2ecf20Sopenharmony_ci	if (!gts)
7838c2ecf20Sopenharmony_ci		return -EINVAL;
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	if (gts->ts_gru)
7868c2ecf20Sopenharmony_ci		gru_unload_context(gts, 1);
7878c2ecf20Sopenharmony_ci	gru_unlock_gts(gts);
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	return 0;
7908c2ecf20Sopenharmony_ci}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci/*
7938c2ecf20Sopenharmony_ci * User request to flush a range of virtual addresses from the GRU TLB
7948c2ecf20Sopenharmony_ci * (Mainly for testing).
7958c2ecf20Sopenharmony_ci */
7968c2ecf20Sopenharmony_ciint gru_user_flush_tlb(unsigned long arg)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
7998c2ecf20Sopenharmony_ci	struct gru_flush_tlb_req req;
8008c2ecf20Sopenharmony_ci	struct gru_mm_struct *gms;
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	STAT(user_flush_tlb);
8038c2ecf20Sopenharmony_ci	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
8048c2ecf20Sopenharmony_ci		return -EFAULT;
8058c2ecf20Sopenharmony_ci
8068c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
8078c2ecf20Sopenharmony_ci		req.vaddr, req.len);
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(req.gseg);
8108c2ecf20Sopenharmony_ci	if (!gts)
8118c2ecf20Sopenharmony_ci		return -EINVAL;
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	gms = gts->ts_gms;
8148c2ecf20Sopenharmony_ci	gru_unlock_gts(gts);
8158c2ecf20Sopenharmony_ci	gru_flush_tlb_range(gms, req.vaddr, req.len);
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci	return 0;
8188c2ecf20Sopenharmony_ci}
8198c2ecf20Sopenharmony_ci
8208c2ecf20Sopenharmony_ci/*
8218c2ecf20Sopenharmony_ci * Fetch GSEG statisticss
8228c2ecf20Sopenharmony_ci */
8238c2ecf20Sopenharmony_cilong gru_get_gseg_statistics(unsigned long arg)
8248c2ecf20Sopenharmony_ci{
8258c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
8268c2ecf20Sopenharmony_ci	struct gru_get_gseg_statistics_req req;
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
8298c2ecf20Sopenharmony_ci		return -EFAULT;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	/*
8328c2ecf20Sopenharmony_ci	 * The library creates arrays of contexts for threaded programs.
8338c2ecf20Sopenharmony_ci	 * If no gts exists in the array, the context has never been used & all
8348c2ecf20Sopenharmony_ci	 * statistics are implicitly 0.
8358c2ecf20Sopenharmony_ci	 */
8368c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(req.gseg);
8378c2ecf20Sopenharmony_ci	if (gts) {
8388c2ecf20Sopenharmony_ci		memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
8398c2ecf20Sopenharmony_ci		gru_unlock_gts(gts);
8408c2ecf20Sopenharmony_ci	} else {
8418c2ecf20Sopenharmony_ci		memset(&req.stats, 0, sizeof(gts->ustats));
8428c2ecf20Sopenharmony_ci	}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
8458c2ecf20Sopenharmony_ci		return -EFAULT;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	return 0;
8488c2ecf20Sopenharmony_ci}
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci/*
8518c2ecf20Sopenharmony_ci * Register the current task as the user of the GSEG slice.
8528c2ecf20Sopenharmony_ci * Needed for TLB fault interrupt targeting.
8538c2ecf20Sopenharmony_ci */
8548c2ecf20Sopenharmony_ciint gru_set_context_option(unsigned long arg)
8558c2ecf20Sopenharmony_ci{
8568c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
8578c2ecf20Sopenharmony_ci	struct gru_set_context_option_req req;
8588c2ecf20Sopenharmony_ci	int ret = 0;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	STAT(set_context_option);
8618c2ecf20Sopenharmony_ci	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
8628c2ecf20Sopenharmony_ci		return -EFAULT;
8638c2ecf20Sopenharmony_ci	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci	gts = gru_find_lock_gts(req.gseg);
8668c2ecf20Sopenharmony_ci	if (!gts) {
8678c2ecf20Sopenharmony_ci		gts = gru_alloc_locked_gts(req.gseg);
8688c2ecf20Sopenharmony_ci		if (IS_ERR(gts))
8698c2ecf20Sopenharmony_ci			return PTR_ERR(gts);
8708c2ecf20Sopenharmony_ci	}
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci	switch (req.op) {
8738c2ecf20Sopenharmony_ci	case sco_blade_chiplet:
8748c2ecf20Sopenharmony_ci		/* Select blade/chiplet for GRU context */
8758c2ecf20Sopenharmony_ci		if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
8768c2ecf20Sopenharmony_ci		    req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
8778c2ecf20Sopenharmony_ci		    (req.val1 >= 0 && !gru_base[req.val1])) {
8788c2ecf20Sopenharmony_ci			ret = -EINVAL;
8798c2ecf20Sopenharmony_ci		} else {
8808c2ecf20Sopenharmony_ci			gts->ts_user_blade_id = req.val1;
8818c2ecf20Sopenharmony_ci			gts->ts_user_chiplet_id = req.val0;
8828c2ecf20Sopenharmony_ci			if (gru_check_context_placement(gts)) {
8838c2ecf20Sopenharmony_ci				gru_unlock_gts(gts);
8848c2ecf20Sopenharmony_ci				gru_unload_context(gts, 1);
8858c2ecf20Sopenharmony_ci				return ret;
8868c2ecf20Sopenharmony_ci			}
8878c2ecf20Sopenharmony_ci		}
8888c2ecf20Sopenharmony_ci		break;
8898c2ecf20Sopenharmony_ci	case sco_gseg_owner:
8908c2ecf20Sopenharmony_ci 		/* Register the current task as the GSEG owner */
8918c2ecf20Sopenharmony_ci		gts->ts_tgid_owner = current->tgid;
8928c2ecf20Sopenharmony_ci		break;
8938c2ecf20Sopenharmony_ci	case sco_cch_req_slice:
8948c2ecf20Sopenharmony_ci 		/* Set the CCH slice option */
8958c2ecf20Sopenharmony_ci		gts->ts_cch_req_slice = req.val1 & 3;
8968c2ecf20Sopenharmony_ci		break;
8978c2ecf20Sopenharmony_ci	default:
8988c2ecf20Sopenharmony_ci		ret = -EINVAL;
8998c2ecf20Sopenharmony_ci	}
9008c2ecf20Sopenharmony_ci	gru_unlock_gts(gts);
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	return ret;
9038c2ecf20Sopenharmony_ci}
904