18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * SN Platform GRU Driver
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/kernel.h>
118c2ecf20Sopenharmony_ci#include <linux/slab.h>
128c2ecf20Sopenharmony_ci#include <linux/mm.h>
138c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
148c2ecf20Sopenharmony_ci#include <linux/sched.h>
158c2ecf20Sopenharmony_ci#include <linux/device.h>
168c2ecf20Sopenharmony_ci#include <linux/list.h>
178c2ecf20Sopenharmony_ci#include <linux/err.h>
188c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
198c2ecf20Sopenharmony_ci#include <asm/uv/uv_hub.h>
208c2ecf20Sopenharmony_ci#include "gru.h"
218c2ecf20Sopenharmony_ci#include "grutables.h"
228c2ecf20Sopenharmony_ci#include "gruhandles.h"
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ciunsigned long gru_options __read_mostly;
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_cistatic struct device_driver gru_driver = {
278c2ecf20Sopenharmony_ci	.name = "gru"
288c2ecf20Sopenharmony_ci};
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_cistatic struct device gru_device = {
318c2ecf20Sopenharmony_ci	.init_name = "",
328c2ecf20Sopenharmony_ci	.driver = &gru_driver,
338c2ecf20Sopenharmony_ci};
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistruct device *grudev = &gru_device;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/*
388c2ecf20Sopenharmony_ci * Select a gru fault map to be used by the current cpu. Note that
398c2ecf20Sopenharmony_ci * multiple cpus may be using the same map.
408c2ecf20Sopenharmony_ci *	ZZZ should be inline but did not work on emulator
418c2ecf20Sopenharmony_ci */
428c2ecf20Sopenharmony_ciint gru_cpu_fault_map_id(void)
438c2ecf20Sopenharmony_ci{
448c2ecf20Sopenharmony_ci#ifdef CONFIG_IA64
458c2ecf20Sopenharmony_ci	return uv_blade_processor_id() % GRU_NUM_TFM;
468c2ecf20Sopenharmony_ci#else
478c2ecf20Sopenharmony_ci	int cpu = smp_processor_id();
488c2ecf20Sopenharmony_ci	int id, core;
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	core = uv_cpu_core_number(cpu);
518c2ecf20Sopenharmony_ci	id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
528c2ecf20Sopenharmony_ci	return id;
538c2ecf20Sopenharmony_ci#endif
548c2ecf20Sopenharmony_ci}
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci/*--------- ASID Management -------------------------------------------
578c2ecf20Sopenharmony_ci *
588c2ecf20Sopenharmony_ci *  Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
598c2ecf20Sopenharmony_ci *  Once MAX is reached, flush the TLB & start over. However,
608c2ecf20Sopenharmony_ci *  some asids may still be in use. There won't be many (percentage wise) still
618c2ecf20Sopenharmony_ci *  in use. Search active contexts & determine the value of the first
628c2ecf20Sopenharmony_ci *  asid in use ("x"s below). Set "limit" to this value.
638c2ecf20Sopenharmony_ci *  This defines a block of assignable asids.
648c2ecf20Sopenharmony_ci *
658c2ecf20Sopenharmony_ci *  When "limit" is reached, search forward from limit+1 and determine the
668c2ecf20Sopenharmony_ci *  next block of assignable asids.
678c2ecf20Sopenharmony_ci *
688c2ecf20Sopenharmony_ci *  Repeat until MAX_ASID is reached, then start over again.
698c2ecf20Sopenharmony_ci *
708c2ecf20Sopenharmony_ci *  Each time MAX_ASID is reached, increment the asid generation. Since
718c2ecf20Sopenharmony_ci *  the search for in-use asids only checks contexts with GRUs currently
728c2ecf20Sopenharmony_ci *  assigned, asids in some contexts will be missed. Prior to loading
738c2ecf20Sopenharmony_ci *  a context, the asid generation of the GTS asid is rechecked. If it
748c2ecf20Sopenharmony_ci *  doesn't match the current generation, a new asid will be assigned.
758c2ecf20Sopenharmony_ci *
768c2ecf20Sopenharmony_ci *   	0---------------x------------x---------------------x----|
778c2ecf20Sopenharmony_ci *	  ^-next	^-limit	   				^-MAX_ASID
788c2ecf20Sopenharmony_ci *
798c2ecf20Sopenharmony_ci * All asid manipulation & context loading/unloading is protected by the
808c2ecf20Sopenharmony_ci * gs_lock.
818c2ecf20Sopenharmony_ci */
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci/* Hit the asid limit. Start over */
848c2ecf20Sopenharmony_cistatic int gru_wrap_asid(struct gru_state *gru)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
878c2ecf20Sopenharmony_ci	STAT(asid_wrap);
888c2ecf20Sopenharmony_ci	gru->gs_asid_gen++;
898c2ecf20Sopenharmony_ci	return MIN_ASID;
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci/* Find the next chunk of unused asids */
938c2ecf20Sopenharmony_cistatic int gru_reset_asid_limit(struct gru_state *gru, int asid)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	int i, gid, inuse_asid, limit;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
988c2ecf20Sopenharmony_ci	STAT(asid_next);
998c2ecf20Sopenharmony_ci	limit = MAX_ASID;
1008c2ecf20Sopenharmony_ci	if (asid >= limit)
1018c2ecf20Sopenharmony_ci		asid = gru_wrap_asid(gru);
1028c2ecf20Sopenharmony_ci	gru_flush_all_tlb(gru);
1038c2ecf20Sopenharmony_ci	gid = gru->gs_gid;
1048c2ecf20Sopenharmony_ciagain:
1058c2ecf20Sopenharmony_ci	for (i = 0; i < GRU_NUM_CCH; i++) {
1068c2ecf20Sopenharmony_ci		if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
1078c2ecf20Sopenharmony_ci			continue;
1088c2ecf20Sopenharmony_ci		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
1098c2ecf20Sopenharmony_ci		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
1108c2ecf20Sopenharmony_ci			gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
1118c2ecf20Sopenharmony_ci			inuse_asid, i);
1128c2ecf20Sopenharmony_ci		if (inuse_asid == asid) {
1138c2ecf20Sopenharmony_ci			asid += ASID_INC;
1148c2ecf20Sopenharmony_ci			if (asid >= limit) {
1158c2ecf20Sopenharmony_ci				/*
1168c2ecf20Sopenharmony_ci				 * empty range: reset the range limit and
1178c2ecf20Sopenharmony_ci				 * start over
1188c2ecf20Sopenharmony_ci				 */
1198c2ecf20Sopenharmony_ci				limit = MAX_ASID;
1208c2ecf20Sopenharmony_ci				if (asid >= MAX_ASID)
1218c2ecf20Sopenharmony_ci					asid = gru_wrap_asid(gru);
1228c2ecf20Sopenharmony_ci				goto again;
1238c2ecf20Sopenharmony_ci			}
1248c2ecf20Sopenharmony_ci		}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		if ((inuse_asid > asid) && (inuse_asid < limit))
1278c2ecf20Sopenharmony_ci			limit = inuse_asid;
1288c2ecf20Sopenharmony_ci	}
1298c2ecf20Sopenharmony_ci	gru->gs_asid_limit = limit;
1308c2ecf20Sopenharmony_ci	gru->gs_asid = asid;
1318c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
1328c2ecf20Sopenharmony_ci					asid, limit);
1338c2ecf20Sopenharmony_ci	return asid;
1348c2ecf20Sopenharmony_ci}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci/* Assign a new ASID to a thread context.  */
1378c2ecf20Sopenharmony_cistatic int gru_assign_asid(struct gru_state *gru)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	int asid;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	gru->gs_asid += ASID_INC;
1428c2ecf20Sopenharmony_ci	asid = gru->gs_asid;
1438c2ecf20Sopenharmony_ci	if (asid >= gru->gs_asid_limit)
1448c2ecf20Sopenharmony_ci		asid = gru_reset_asid_limit(gru, asid);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
1478c2ecf20Sopenharmony_ci	return asid;
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci/*
1518c2ecf20Sopenharmony_ci * Clear n bits in a word. Return a word indicating the bits that were cleared.
1528c2ecf20Sopenharmony_ci * Optionally, build an array of chars that contain the bit numbers allocated.
1538c2ecf20Sopenharmony_ci */
1548c2ecf20Sopenharmony_cistatic unsigned long reserve_resources(unsigned long *p, int n, int mmax,
1558c2ecf20Sopenharmony_ci				       char *idx)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	unsigned long bits = 0;
1588c2ecf20Sopenharmony_ci	int i;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	while (n--) {
1618c2ecf20Sopenharmony_ci		i = find_first_bit(p, mmax);
1628c2ecf20Sopenharmony_ci		if (i == mmax)
1638c2ecf20Sopenharmony_ci			BUG();
1648c2ecf20Sopenharmony_ci		__clear_bit(i, p);
1658c2ecf20Sopenharmony_ci		__set_bit(i, &bits);
1668c2ecf20Sopenharmony_ci		if (idx)
1678c2ecf20Sopenharmony_ci			*idx++ = i;
1688c2ecf20Sopenharmony_ci	}
1698c2ecf20Sopenharmony_ci	return bits;
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ciunsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
1738c2ecf20Sopenharmony_ci				       char *cbmap)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
1768c2ecf20Sopenharmony_ci				 cbmap);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ciunsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
1808c2ecf20Sopenharmony_ci				       char *dsmap)
1818c2ecf20Sopenharmony_ci{
1828c2ecf20Sopenharmony_ci	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
1838c2ecf20Sopenharmony_ci				 dsmap);
1848c2ecf20Sopenharmony_ci}
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_cistatic void reserve_gru_resources(struct gru_state *gru,
1878c2ecf20Sopenharmony_ci				  struct gru_thread_state *gts)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	gru->gs_active_contexts++;
1908c2ecf20Sopenharmony_ci	gts->ts_cbr_map =
1918c2ecf20Sopenharmony_ci	    gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
1928c2ecf20Sopenharmony_ci				     gts->ts_cbr_idx);
1938c2ecf20Sopenharmony_ci	gts->ts_dsr_map =
1948c2ecf20Sopenharmony_ci	    gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cistatic void free_gru_resources(struct gru_state *gru,
1988c2ecf20Sopenharmony_ci			       struct gru_thread_state *gts)
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	gru->gs_active_contexts--;
2018c2ecf20Sopenharmony_ci	gru->gs_cbr_map |= gts->ts_cbr_map;
2028c2ecf20Sopenharmony_ci	gru->gs_dsr_map |= gts->ts_dsr_map;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci/*
2068c2ecf20Sopenharmony_ci * Check if a GRU has sufficient free resources to satisfy an allocation
2078c2ecf20Sopenharmony_ci * request. Note: GRU locks may or may not be held when this is called. If
2088c2ecf20Sopenharmony_ci * not held, recheck after acquiring the appropriate locks.
2098c2ecf20Sopenharmony_ci *
2108c2ecf20Sopenharmony_ci * Returns 1 if sufficient resources, 0 if not
2118c2ecf20Sopenharmony_ci */
2128c2ecf20Sopenharmony_cistatic int check_gru_resources(struct gru_state *gru, int cbr_au_count,
2138c2ecf20Sopenharmony_ci			       int dsr_au_count, int max_active_contexts)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	return hweight64(gru->gs_cbr_map) >= cbr_au_count
2168c2ecf20Sopenharmony_ci		&& hweight64(gru->gs_dsr_map) >= dsr_au_count
2178c2ecf20Sopenharmony_ci		&& gru->gs_active_contexts < max_active_contexts;
2188c2ecf20Sopenharmony_ci}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci/*
2218c2ecf20Sopenharmony_ci * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
2228c2ecf20Sopenharmony_ci * context.
2238c2ecf20Sopenharmony_ci */
2248c2ecf20Sopenharmony_cistatic int gru_load_mm_tracker(struct gru_state *gru,
2258c2ecf20Sopenharmony_ci					struct gru_thread_state *gts)
2268c2ecf20Sopenharmony_ci{
2278c2ecf20Sopenharmony_ci	struct gru_mm_struct *gms = gts->ts_gms;
2288c2ecf20Sopenharmony_ci	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
2298c2ecf20Sopenharmony_ci	unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
2308c2ecf20Sopenharmony_ci	int asid;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	spin_lock(&gms->ms_asid_lock);
2338c2ecf20Sopenharmony_ci	asid = asids->mt_asid;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	spin_lock(&gru->gs_asid_lock);
2368c2ecf20Sopenharmony_ci	if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
2378c2ecf20Sopenharmony_ci			  gru->gs_asid_gen)) {
2388c2ecf20Sopenharmony_ci		asid = gru_assign_asid(gru);
2398c2ecf20Sopenharmony_ci		asids->mt_asid = asid;
2408c2ecf20Sopenharmony_ci		asids->mt_asid_gen = gru->gs_asid_gen;
2418c2ecf20Sopenharmony_ci		STAT(asid_new);
2428c2ecf20Sopenharmony_ci	} else {
2438c2ecf20Sopenharmony_ci		STAT(asid_reuse);
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ci	spin_unlock(&gru->gs_asid_lock);
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
2488c2ecf20Sopenharmony_ci	asids->mt_ctxbitmap |= ctxbitmap;
2498c2ecf20Sopenharmony_ci	if (!test_bit(gru->gs_gid, gms->ms_asidmap))
2508c2ecf20Sopenharmony_ci		__set_bit(gru->gs_gid, gms->ms_asidmap);
2518c2ecf20Sopenharmony_ci	spin_unlock(&gms->ms_asid_lock);
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci	gru_dbg(grudev,
2548c2ecf20Sopenharmony_ci		"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
2558c2ecf20Sopenharmony_ci		gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
2568c2ecf20Sopenharmony_ci		gms->ms_asidmap[0]);
2578c2ecf20Sopenharmony_ci	return asid;
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cistatic void gru_unload_mm_tracker(struct gru_state *gru,
2618c2ecf20Sopenharmony_ci					struct gru_thread_state *gts)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	struct gru_mm_struct *gms = gts->ts_gms;
2648c2ecf20Sopenharmony_ci	struct gru_mm_tracker *asids;
2658c2ecf20Sopenharmony_ci	unsigned short ctxbitmap;
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	asids = &gms->ms_asids[gru->gs_gid];
2688c2ecf20Sopenharmony_ci	ctxbitmap = (1 << gts->ts_ctxnum);
2698c2ecf20Sopenharmony_ci	spin_lock(&gms->ms_asid_lock);
2708c2ecf20Sopenharmony_ci	spin_lock(&gru->gs_asid_lock);
2718c2ecf20Sopenharmony_ci	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
2728c2ecf20Sopenharmony_ci	asids->mt_ctxbitmap ^= ctxbitmap;
2738c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
2748c2ecf20Sopenharmony_ci		gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
2758c2ecf20Sopenharmony_ci	spin_unlock(&gru->gs_asid_lock);
2768c2ecf20Sopenharmony_ci	spin_unlock(&gms->ms_asid_lock);
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci/*
2808c2ecf20Sopenharmony_ci * Decrement the reference count on a GTS structure. Free the structure
2818c2ecf20Sopenharmony_ci * if the reference count goes to zero.
2828c2ecf20Sopenharmony_ci */
2838c2ecf20Sopenharmony_civoid gts_drop(struct gru_thread_state *gts)
2848c2ecf20Sopenharmony_ci{
2858c2ecf20Sopenharmony_ci	if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
2868c2ecf20Sopenharmony_ci		if (gts->ts_gms)
2878c2ecf20Sopenharmony_ci			gru_drop_mmu_notifier(gts->ts_gms);
2888c2ecf20Sopenharmony_ci		kfree(gts);
2898c2ecf20Sopenharmony_ci		STAT(gts_free);
2908c2ecf20Sopenharmony_ci	}
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci/*
2948c2ecf20Sopenharmony_ci * Locate the GTS structure for the current thread.
2958c2ecf20Sopenharmony_ci */
2968c2ecf20Sopenharmony_cistatic struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
2978c2ecf20Sopenharmony_ci			    *vdata, int tsid)
2988c2ecf20Sopenharmony_ci{
2998c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	list_for_each_entry(gts, &vdata->vd_head, ts_next)
3028c2ecf20Sopenharmony_ci	    if (gts->ts_tsid == tsid)
3038c2ecf20Sopenharmony_ci		return gts;
3048c2ecf20Sopenharmony_ci	return NULL;
3058c2ecf20Sopenharmony_ci}
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci/*
3088c2ecf20Sopenharmony_ci * Allocate a thread state structure.
3098c2ecf20Sopenharmony_ci */
3108c2ecf20Sopenharmony_cistruct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
3118c2ecf20Sopenharmony_ci		int cbr_au_count, int dsr_au_count,
3128c2ecf20Sopenharmony_ci		unsigned char tlb_preload_count, int options, int tsid)
3138c2ecf20Sopenharmony_ci{
3148c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
3158c2ecf20Sopenharmony_ci	struct gru_mm_struct *gms;
3168c2ecf20Sopenharmony_ci	int bytes;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
3198c2ecf20Sopenharmony_ci	bytes += sizeof(struct gru_thread_state);
3208c2ecf20Sopenharmony_ci	gts = kmalloc(bytes, GFP_KERNEL);
3218c2ecf20Sopenharmony_ci	if (!gts)
3228c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	STAT(gts_alloc);
3258c2ecf20Sopenharmony_ci	memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
3268c2ecf20Sopenharmony_ci	atomic_set(&gts->ts_refcnt, 1);
3278c2ecf20Sopenharmony_ci	mutex_init(&gts->ts_ctxlock);
3288c2ecf20Sopenharmony_ci	gts->ts_cbr_au_count = cbr_au_count;
3298c2ecf20Sopenharmony_ci	gts->ts_dsr_au_count = dsr_au_count;
3308c2ecf20Sopenharmony_ci	gts->ts_tlb_preload_count = tlb_preload_count;
3318c2ecf20Sopenharmony_ci	gts->ts_user_options = options;
3328c2ecf20Sopenharmony_ci	gts->ts_user_blade_id = -1;
3338c2ecf20Sopenharmony_ci	gts->ts_user_chiplet_id = -1;
3348c2ecf20Sopenharmony_ci	gts->ts_tsid = tsid;
3358c2ecf20Sopenharmony_ci	gts->ts_ctxnum = NULLCTX;
3368c2ecf20Sopenharmony_ci	gts->ts_tlb_int_select = -1;
3378c2ecf20Sopenharmony_ci	gts->ts_cch_req_slice = -1;
3388c2ecf20Sopenharmony_ci	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
3398c2ecf20Sopenharmony_ci	if (vma) {
3408c2ecf20Sopenharmony_ci		gts->ts_mm = current->mm;
3418c2ecf20Sopenharmony_ci		gts->ts_vma = vma;
3428c2ecf20Sopenharmony_ci		gms = gru_register_mmu_notifier();
3438c2ecf20Sopenharmony_ci		if (IS_ERR(gms))
3448c2ecf20Sopenharmony_ci			goto err;
3458c2ecf20Sopenharmony_ci		gts->ts_gms = gms;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	gru_dbg(grudev, "alloc gts %p\n", gts);
3498c2ecf20Sopenharmony_ci	return gts;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_cierr:
3528c2ecf20Sopenharmony_ci	gts_drop(gts);
3538c2ecf20Sopenharmony_ci	return ERR_CAST(gms);
3548c2ecf20Sopenharmony_ci}
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci/*
3578c2ecf20Sopenharmony_ci * Allocate a vma private data structure.
3588c2ecf20Sopenharmony_ci */
3598c2ecf20Sopenharmony_cistruct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
3608c2ecf20Sopenharmony_ci{
3618c2ecf20Sopenharmony_ci	struct gru_vma_data *vdata = NULL;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
3648c2ecf20Sopenharmony_ci	if (!vdata)
3658c2ecf20Sopenharmony_ci		return NULL;
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci	STAT(vdata_alloc);
3688c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&vdata->vd_head);
3698c2ecf20Sopenharmony_ci	spin_lock_init(&vdata->vd_lock);
3708c2ecf20Sopenharmony_ci	gru_dbg(grudev, "alloc vdata %p\n", vdata);
3718c2ecf20Sopenharmony_ci	return vdata;
3728c2ecf20Sopenharmony_ci}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci/*
3758c2ecf20Sopenharmony_ci * Find the thread state structure for the current thread.
3768c2ecf20Sopenharmony_ci */
3778c2ecf20Sopenharmony_cistruct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
3788c2ecf20Sopenharmony_ci					int tsid)
3798c2ecf20Sopenharmony_ci{
3808c2ecf20Sopenharmony_ci	struct gru_vma_data *vdata = vma->vm_private_data;
3818c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	spin_lock(&vdata->vd_lock);
3848c2ecf20Sopenharmony_ci	gts = gru_find_current_gts_nolock(vdata, tsid);
3858c2ecf20Sopenharmony_ci	spin_unlock(&vdata->vd_lock);
3868c2ecf20Sopenharmony_ci	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
3878c2ecf20Sopenharmony_ci	return gts;
3888c2ecf20Sopenharmony_ci}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci/*
3918c2ecf20Sopenharmony_ci * Allocate a new thread state for a GSEG. Note that races may allow
3928c2ecf20Sopenharmony_ci * another thread to race to create a gts.
3938c2ecf20Sopenharmony_ci */
3948c2ecf20Sopenharmony_cistruct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
3958c2ecf20Sopenharmony_ci					int tsid)
3968c2ecf20Sopenharmony_ci{
3978c2ecf20Sopenharmony_ci	struct gru_vma_data *vdata = vma->vm_private_data;
3988c2ecf20Sopenharmony_ci	struct gru_thread_state *gts, *ngts;
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
4018c2ecf20Sopenharmony_ci			    vdata->vd_dsr_au_count,
4028c2ecf20Sopenharmony_ci			    vdata->vd_tlb_preload_count,
4038c2ecf20Sopenharmony_ci			    vdata->vd_user_options, tsid);
4048c2ecf20Sopenharmony_ci	if (IS_ERR(gts))
4058c2ecf20Sopenharmony_ci		return gts;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	spin_lock(&vdata->vd_lock);
4088c2ecf20Sopenharmony_ci	ngts = gru_find_current_gts_nolock(vdata, tsid);
4098c2ecf20Sopenharmony_ci	if (ngts) {
4108c2ecf20Sopenharmony_ci		gts_drop(gts);
4118c2ecf20Sopenharmony_ci		gts = ngts;
4128c2ecf20Sopenharmony_ci		STAT(gts_double_allocate);
4138c2ecf20Sopenharmony_ci	} else {
4148c2ecf20Sopenharmony_ci		list_add(&gts->ts_next, &vdata->vd_head);
4158c2ecf20Sopenharmony_ci	}
4168c2ecf20Sopenharmony_ci	spin_unlock(&vdata->vd_lock);
4178c2ecf20Sopenharmony_ci	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
4188c2ecf20Sopenharmony_ci	return gts;
4198c2ecf20Sopenharmony_ci}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci/*
4228c2ecf20Sopenharmony_ci * Free the GRU context assigned to the thread state.
4238c2ecf20Sopenharmony_ci */
4248c2ecf20Sopenharmony_cistatic void gru_free_gru_context(struct gru_thread_state *gts)
4258c2ecf20Sopenharmony_ci{
4268c2ecf20Sopenharmony_ci	struct gru_state *gru;
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	gru = gts->ts_gru;
4298c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	spin_lock(&gru->gs_lock);
4328c2ecf20Sopenharmony_ci	gru->gs_gts[gts->ts_ctxnum] = NULL;
4338c2ecf20Sopenharmony_ci	free_gru_resources(gru, gts);
4348c2ecf20Sopenharmony_ci	BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
4358c2ecf20Sopenharmony_ci	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
4368c2ecf20Sopenharmony_ci	gts->ts_ctxnum = NULLCTX;
4378c2ecf20Sopenharmony_ci	gts->ts_gru = NULL;
4388c2ecf20Sopenharmony_ci	gts->ts_blade = -1;
4398c2ecf20Sopenharmony_ci	spin_unlock(&gru->gs_lock);
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	gts_drop(gts);
4428c2ecf20Sopenharmony_ci	STAT(free_context);
4438c2ecf20Sopenharmony_ci}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci/*
4468c2ecf20Sopenharmony_ci * Prefetching cachelines help hardware performance.
4478c2ecf20Sopenharmony_ci * (Strictly a performance enhancement. Not functionally required).
4488c2ecf20Sopenharmony_ci */
4498c2ecf20Sopenharmony_cistatic void prefetch_data(void *p, int num, int stride)
4508c2ecf20Sopenharmony_ci{
4518c2ecf20Sopenharmony_ci	while (num-- > 0) {
4528c2ecf20Sopenharmony_ci		prefetchw(p);
4538c2ecf20Sopenharmony_ci		p += stride;
4548c2ecf20Sopenharmony_ci	}
4558c2ecf20Sopenharmony_ci}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_cistatic inline long gru_copy_handle(void *d, void *s)
4588c2ecf20Sopenharmony_ci{
4598c2ecf20Sopenharmony_ci	memcpy(d, s, GRU_HANDLE_BYTES);
4608c2ecf20Sopenharmony_ci	return GRU_HANDLE_BYTES;
4618c2ecf20Sopenharmony_ci}
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_cistatic void gru_prefetch_context(void *gseg, void *cb, void *cbe,
4648c2ecf20Sopenharmony_ci				unsigned long cbrmap, unsigned long length)
4658c2ecf20Sopenharmony_ci{
4668c2ecf20Sopenharmony_ci	int i, scr;
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
4698c2ecf20Sopenharmony_ci		      GRU_CACHE_LINE_BYTES);
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
4728c2ecf20Sopenharmony_ci		prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
4738c2ecf20Sopenharmony_ci		prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
4748c2ecf20Sopenharmony_ci			      GRU_CACHE_LINE_BYTES);
4758c2ecf20Sopenharmony_ci		cb += GRU_HANDLE_STRIDE;
4768c2ecf20Sopenharmony_ci	}
4778c2ecf20Sopenharmony_ci}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_cistatic void gru_load_context_data(void *save, void *grubase, int ctxnum,
4808c2ecf20Sopenharmony_ci				  unsigned long cbrmap, unsigned long dsrmap,
4818c2ecf20Sopenharmony_ci				  int data_valid)
4828c2ecf20Sopenharmony_ci{
4838c2ecf20Sopenharmony_ci	void *gseg, *cb, *cbe;
4848c2ecf20Sopenharmony_ci	unsigned long length;
4858c2ecf20Sopenharmony_ci	int i, scr;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
4888c2ecf20Sopenharmony_ci	cb = gseg + GRU_CB_BASE;
4898c2ecf20Sopenharmony_ci	cbe = grubase + GRU_CBE_BASE;
4908c2ecf20Sopenharmony_ci	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
4918c2ecf20Sopenharmony_ci	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
4948c2ecf20Sopenharmony_ci		if (data_valid) {
4958c2ecf20Sopenharmony_ci			save += gru_copy_handle(cb, save);
4968c2ecf20Sopenharmony_ci			save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
4978c2ecf20Sopenharmony_ci						save);
4988c2ecf20Sopenharmony_ci		} else {
4998c2ecf20Sopenharmony_ci			memset(cb, 0, GRU_CACHE_LINE_BYTES);
5008c2ecf20Sopenharmony_ci			memset(cbe + i * GRU_HANDLE_STRIDE, 0,
5018c2ecf20Sopenharmony_ci						GRU_CACHE_LINE_BYTES);
5028c2ecf20Sopenharmony_ci		}
5038c2ecf20Sopenharmony_ci		/* Flush CBE to hide race in context restart */
5048c2ecf20Sopenharmony_ci		mb();
5058c2ecf20Sopenharmony_ci		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
5068c2ecf20Sopenharmony_ci		cb += GRU_HANDLE_STRIDE;
5078c2ecf20Sopenharmony_ci	}
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	if (data_valid)
5108c2ecf20Sopenharmony_ci		memcpy(gseg + GRU_DS_BASE, save, length);
5118c2ecf20Sopenharmony_ci	else
5128c2ecf20Sopenharmony_ci		memset(gseg + GRU_DS_BASE, 0, length);
5138c2ecf20Sopenharmony_ci}
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_cistatic void gru_unload_context_data(void *save, void *grubase, int ctxnum,
5168c2ecf20Sopenharmony_ci				    unsigned long cbrmap, unsigned long dsrmap)
5178c2ecf20Sopenharmony_ci{
5188c2ecf20Sopenharmony_ci	void *gseg, *cb, *cbe;
5198c2ecf20Sopenharmony_ci	unsigned long length;
5208c2ecf20Sopenharmony_ci	int i, scr;
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
5238c2ecf20Sopenharmony_ci	cb = gseg + GRU_CB_BASE;
5248c2ecf20Sopenharmony_ci	cbe = grubase + GRU_CBE_BASE;
5258c2ecf20Sopenharmony_ci	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/* CBEs may not be coherent. Flush them from cache */
5288c2ecf20Sopenharmony_ci	for_each_cbr_in_allocation_map(i, &cbrmap, scr)
5298c2ecf20Sopenharmony_ci		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
5308c2ecf20Sopenharmony_ci	mb();		/* Let the CL flush complete */
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
5358c2ecf20Sopenharmony_ci		save += gru_copy_handle(save, cb);
5368c2ecf20Sopenharmony_ci		save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
5378c2ecf20Sopenharmony_ci		cb += GRU_HANDLE_STRIDE;
5388c2ecf20Sopenharmony_ci	}
5398c2ecf20Sopenharmony_ci	memcpy(save, gseg + GRU_DS_BASE, length);
5408c2ecf20Sopenharmony_ci}
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_civoid gru_unload_context(struct gru_thread_state *gts, int savestate)
5438c2ecf20Sopenharmony_ci{
5448c2ecf20Sopenharmony_ci	struct gru_state *gru = gts->ts_gru;
5458c2ecf20Sopenharmony_ci	struct gru_context_configuration_handle *cch;
5468c2ecf20Sopenharmony_ci	int ctxnum = gts->ts_ctxnum;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	if (!is_kernel_context(gts))
5498c2ecf20Sopenharmony_ci		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
5508c2ecf20Sopenharmony_ci	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
5538c2ecf20Sopenharmony_ci		gts, gts->ts_cbr_map, gts->ts_dsr_map);
5548c2ecf20Sopenharmony_ci	lock_cch_handle(cch);
5558c2ecf20Sopenharmony_ci	if (cch_interrupt_sync(cch))
5568c2ecf20Sopenharmony_ci		BUG();
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci	if (!is_kernel_context(gts))
5598c2ecf20Sopenharmony_ci		gru_unload_mm_tracker(gru, gts);
5608c2ecf20Sopenharmony_ci	if (savestate) {
5618c2ecf20Sopenharmony_ci		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
5628c2ecf20Sopenharmony_ci					ctxnum, gts->ts_cbr_map,
5638c2ecf20Sopenharmony_ci					gts->ts_dsr_map);
5648c2ecf20Sopenharmony_ci		gts->ts_data_valid = 1;
5658c2ecf20Sopenharmony_ci	}
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	if (cch_deallocate(cch))
5688c2ecf20Sopenharmony_ci		BUG();
5698c2ecf20Sopenharmony_ci	unlock_cch_handle(cch);
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	gru_free_gru_context(gts);
5728c2ecf20Sopenharmony_ci}
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci/*
5758c2ecf20Sopenharmony_ci * Load a GRU context by copying it from the thread data structure in memory
5768c2ecf20Sopenharmony_ci * to the GRU.
5778c2ecf20Sopenharmony_ci */
5788c2ecf20Sopenharmony_civoid gru_load_context(struct gru_thread_state *gts)
5798c2ecf20Sopenharmony_ci{
5808c2ecf20Sopenharmony_ci	struct gru_state *gru = gts->ts_gru;
5818c2ecf20Sopenharmony_ci	struct gru_context_configuration_handle *cch;
5828c2ecf20Sopenharmony_ci	int i, err, asid, ctxnum = gts->ts_ctxnum;
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
5858c2ecf20Sopenharmony_ci	lock_cch_handle(cch);
5868c2ecf20Sopenharmony_ci	cch->tfm_fault_bit_enable =
5878c2ecf20Sopenharmony_ci	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
5888c2ecf20Sopenharmony_ci	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
5898c2ecf20Sopenharmony_ci	cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
5908c2ecf20Sopenharmony_ci	if (cch->tlb_int_enable) {
5918c2ecf20Sopenharmony_ci		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
5928c2ecf20Sopenharmony_ci		cch->tlb_int_select = gts->ts_tlb_int_select;
5938c2ecf20Sopenharmony_ci	}
5948c2ecf20Sopenharmony_ci	if (gts->ts_cch_req_slice >= 0) {
5958c2ecf20Sopenharmony_ci		cch->req_slice_set_enable = 1;
5968c2ecf20Sopenharmony_ci		cch->req_slice = gts->ts_cch_req_slice;
5978c2ecf20Sopenharmony_ci	} else {
5988c2ecf20Sopenharmony_ci		cch->req_slice_set_enable =0;
5998c2ecf20Sopenharmony_ci	}
6008c2ecf20Sopenharmony_ci	cch->tfm_done_bit_enable = 0;
6018c2ecf20Sopenharmony_ci	cch->dsr_allocation_map = gts->ts_dsr_map;
6028c2ecf20Sopenharmony_ci	cch->cbr_allocation_map = gts->ts_cbr_map;
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci	if (is_kernel_context(gts)) {
6058c2ecf20Sopenharmony_ci		cch->unmap_enable = 1;
6068c2ecf20Sopenharmony_ci		cch->tfm_done_bit_enable = 1;
6078c2ecf20Sopenharmony_ci		cch->cb_int_enable = 1;
6088c2ecf20Sopenharmony_ci		cch->tlb_int_select = 0;	/* For now, ints go to cpu 0 */
6098c2ecf20Sopenharmony_ci	} else {
6108c2ecf20Sopenharmony_ci		cch->unmap_enable = 0;
6118c2ecf20Sopenharmony_ci		cch->tfm_done_bit_enable = 0;
6128c2ecf20Sopenharmony_ci		cch->cb_int_enable = 0;
6138c2ecf20Sopenharmony_ci		asid = gru_load_mm_tracker(gru, gts);
6148c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++) {
6158c2ecf20Sopenharmony_ci			cch->asid[i] = asid + i;
6168c2ecf20Sopenharmony_ci			cch->sizeavail[i] = gts->ts_sizeavail;
6178c2ecf20Sopenharmony_ci		}
6188c2ecf20Sopenharmony_ci	}
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	err = cch_allocate(cch);
6218c2ecf20Sopenharmony_ci	if (err) {
6228c2ecf20Sopenharmony_ci		gru_dbg(grudev,
6238c2ecf20Sopenharmony_ci			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
6248c2ecf20Sopenharmony_ci			err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
6258c2ecf20Sopenharmony_ci		BUG();
6268c2ecf20Sopenharmony_ci	}
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
6298c2ecf20Sopenharmony_ci			gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	if (cch_start(cch))
6328c2ecf20Sopenharmony_ci		BUG();
6338c2ecf20Sopenharmony_ci	unlock_cch_handle(cch);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
6368c2ecf20Sopenharmony_ci		gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
6378c2ecf20Sopenharmony_ci		(gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
6388c2ecf20Sopenharmony_ci}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci/*
6418c2ecf20Sopenharmony_ci * Update fields in an active CCH:
6428c2ecf20Sopenharmony_ci * 	- retarget interrupts on local blade
6438c2ecf20Sopenharmony_ci * 	- update sizeavail mask
6448c2ecf20Sopenharmony_ci */
6458c2ecf20Sopenharmony_ciint gru_update_cch(struct gru_thread_state *gts)
6468c2ecf20Sopenharmony_ci{
6478c2ecf20Sopenharmony_ci	struct gru_context_configuration_handle *cch;
6488c2ecf20Sopenharmony_ci	struct gru_state *gru = gts->ts_gru;
6498c2ecf20Sopenharmony_ci	int i, ctxnum = gts->ts_ctxnum, ret = 0;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	lock_cch_handle(cch);
6548c2ecf20Sopenharmony_ci	if (cch->state == CCHSTATE_ACTIVE) {
6558c2ecf20Sopenharmony_ci		if (gru->gs_gts[gts->ts_ctxnum] != gts)
6568c2ecf20Sopenharmony_ci			goto exit;
6578c2ecf20Sopenharmony_ci		if (cch_interrupt(cch))
6588c2ecf20Sopenharmony_ci			BUG();
6598c2ecf20Sopenharmony_ci		for (i = 0; i < 8; i++)
6608c2ecf20Sopenharmony_ci			cch->sizeavail[i] = gts->ts_sizeavail;
6618c2ecf20Sopenharmony_ci		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
6628c2ecf20Sopenharmony_ci		cch->tlb_int_select = gru_cpu_fault_map_id();
6638c2ecf20Sopenharmony_ci		cch->tfm_fault_bit_enable =
6648c2ecf20Sopenharmony_ci		  (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
6658c2ecf20Sopenharmony_ci		    || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
6668c2ecf20Sopenharmony_ci		if (cch_start(cch))
6678c2ecf20Sopenharmony_ci			BUG();
6688c2ecf20Sopenharmony_ci		ret = 1;
6698c2ecf20Sopenharmony_ci	}
6708c2ecf20Sopenharmony_ciexit:
6718c2ecf20Sopenharmony_ci	unlock_cch_handle(cch);
6728c2ecf20Sopenharmony_ci	return ret;
6738c2ecf20Sopenharmony_ci}
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci/*
6768c2ecf20Sopenharmony_ci * Update CCH tlb interrupt select. Required when all the following is true:
6778c2ecf20Sopenharmony_ci * 	- task's GRU context is loaded into a GRU
6788c2ecf20Sopenharmony_ci * 	- task is using interrupt notification for TLB faults
6798c2ecf20Sopenharmony_ci * 	- task has migrated to a different cpu on the same blade where
6808c2ecf20Sopenharmony_ci * 	  it was previously running.
6818c2ecf20Sopenharmony_ci */
6828c2ecf20Sopenharmony_cistatic int gru_retarget_intr(struct gru_thread_state *gts)
6838c2ecf20Sopenharmony_ci{
6848c2ecf20Sopenharmony_ci	if (gts->ts_tlb_int_select < 0
6858c2ecf20Sopenharmony_ci	    || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
6868c2ecf20Sopenharmony_ci		return 0;
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_ci	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
6898c2ecf20Sopenharmony_ci		gru_cpu_fault_map_id());
6908c2ecf20Sopenharmony_ci	return gru_update_cch(gts);
6918c2ecf20Sopenharmony_ci}
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci/*
6948c2ecf20Sopenharmony_ci * Check if a GRU context is allowed to use a specific chiplet. By default
6958c2ecf20Sopenharmony_ci * a context is assigned to any blade-local chiplet. However, users can
6968c2ecf20Sopenharmony_ci * override this.
6978c2ecf20Sopenharmony_ci * 	Returns 1 if assignment allowed, 0 otherwise
6988c2ecf20Sopenharmony_ci */
6998c2ecf20Sopenharmony_cistatic int gru_check_chiplet_assignment(struct gru_state *gru,
7008c2ecf20Sopenharmony_ci					struct gru_thread_state *gts)
7018c2ecf20Sopenharmony_ci{
7028c2ecf20Sopenharmony_ci	int blade_id;
7038c2ecf20Sopenharmony_ci	int chiplet_id;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	blade_id = gts->ts_user_blade_id;
7068c2ecf20Sopenharmony_ci	if (blade_id < 0)
7078c2ecf20Sopenharmony_ci		blade_id = uv_numa_blade_id();
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	chiplet_id = gts->ts_user_chiplet_id;
7108c2ecf20Sopenharmony_ci	return gru->gs_blade_id == blade_id &&
7118c2ecf20Sopenharmony_ci		(chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
7128c2ecf20Sopenharmony_ci}
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci/*
7158c2ecf20Sopenharmony_ci * Unload the gru context if it is not assigned to the correct blade or
7168c2ecf20Sopenharmony_ci * chiplet. Misassignment can occur if the process migrates to a different
7178c2ecf20Sopenharmony_ci * blade or if the user changes the selected blade/chiplet.
7188c2ecf20Sopenharmony_ci */
7198c2ecf20Sopenharmony_ciint gru_check_context_placement(struct gru_thread_state *gts)
7208c2ecf20Sopenharmony_ci{
7218c2ecf20Sopenharmony_ci	struct gru_state *gru;
7228c2ecf20Sopenharmony_ci	int ret = 0;
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	/*
7258c2ecf20Sopenharmony_ci	 * If the current task is the context owner, verify that the
7268c2ecf20Sopenharmony_ci	 * context is correctly placed. This test is skipped for non-owner
7278c2ecf20Sopenharmony_ci	 * references. Pthread apps use non-owner references to the CBRs.
7288c2ecf20Sopenharmony_ci	 */
7298c2ecf20Sopenharmony_ci	gru = gts->ts_gru;
7308c2ecf20Sopenharmony_ci	/*
7318c2ecf20Sopenharmony_ci	 * If gru or gts->ts_tgid_owner isn't initialized properly, return
7328c2ecf20Sopenharmony_ci	 * success to indicate that the caller does not need to unload the
7338c2ecf20Sopenharmony_ci	 * gru context.The caller is responsible for their inspection and
7348c2ecf20Sopenharmony_ci	 * reinitialization if needed.
7358c2ecf20Sopenharmony_ci	 */
7368c2ecf20Sopenharmony_ci	if (!gru || gts->ts_tgid_owner != current->tgid)
7378c2ecf20Sopenharmony_ci		return ret;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	if (!gru_check_chiplet_assignment(gru, gts)) {
7408c2ecf20Sopenharmony_ci		STAT(check_context_unload);
7418c2ecf20Sopenharmony_ci		ret = -EINVAL;
7428c2ecf20Sopenharmony_ci	} else if (gru_retarget_intr(gts)) {
7438c2ecf20Sopenharmony_ci		STAT(check_context_retarget_intr);
7448c2ecf20Sopenharmony_ci	}
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_ci	return ret;
7478c2ecf20Sopenharmony_ci}
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci/*
7518c2ecf20Sopenharmony_ci * Insufficient GRU resources available on the local blade. Steal a context from
7528c2ecf20Sopenharmony_ci * a process. This is a hack until a _real_ resource scheduler is written....
7538c2ecf20Sopenharmony_ci */
7548c2ecf20Sopenharmony_ci#define next_ctxnum(n)	((n) <  GRU_NUM_CCH - 2 ? (n) + 1 : 0)
7558c2ecf20Sopenharmony_ci#define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
7568c2ecf20Sopenharmony_ci				 ((g)+1) : &(b)->bs_grus[0])
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_cistatic int is_gts_stealable(struct gru_thread_state *gts,
7598c2ecf20Sopenharmony_ci		struct gru_blade_state *bs)
7608c2ecf20Sopenharmony_ci{
7618c2ecf20Sopenharmony_ci	if (is_kernel_context(gts))
7628c2ecf20Sopenharmony_ci		return down_write_trylock(&bs->bs_kgts_sema);
7638c2ecf20Sopenharmony_ci	else
7648c2ecf20Sopenharmony_ci		return mutex_trylock(&gts->ts_ctxlock);
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cistatic void gts_stolen(struct gru_thread_state *gts,
7688c2ecf20Sopenharmony_ci		struct gru_blade_state *bs)
7698c2ecf20Sopenharmony_ci{
7708c2ecf20Sopenharmony_ci	if (is_kernel_context(gts)) {
7718c2ecf20Sopenharmony_ci		up_write(&bs->bs_kgts_sema);
7728c2ecf20Sopenharmony_ci		STAT(steal_kernel_context);
7738c2ecf20Sopenharmony_ci	} else {
7748c2ecf20Sopenharmony_ci		mutex_unlock(&gts->ts_ctxlock);
7758c2ecf20Sopenharmony_ci		STAT(steal_user_context);
7768c2ecf20Sopenharmony_ci	}
7778c2ecf20Sopenharmony_ci}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_civoid gru_steal_context(struct gru_thread_state *gts)
7808c2ecf20Sopenharmony_ci{
7818c2ecf20Sopenharmony_ci	struct gru_blade_state *blade;
7828c2ecf20Sopenharmony_ci	struct gru_state *gru, *gru0;
7838c2ecf20Sopenharmony_ci	struct gru_thread_state *ngts = NULL;
7848c2ecf20Sopenharmony_ci	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
7858c2ecf20Sopenharmony_ci	int blade_id;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	blade_id = gts->ts_user_blade_id;
7888c2ecf20Sopenharmony_ci	if (blade_id < 0)
7898c2ecf20Sopenharmony_ci		blade_id = uv_numa_blade_id();
7908c2ecf20Sopenharmony_ci	cbr = gts->ts_cbr_au_count;
7918c2ecf20Sopenharmony_ci	dsr = gts->ts_dsr_au_count;
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	blade = gru_base[blade_id];
7948c2ecf20Sopenharmony_ci	spin_lock(&blade->bs_lock);
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
7978c2ecf20Sopenharmony_ci	gru = blade->bs_lru_gru;
7988c2ecf20Sopenharmony_ci	if (ctxnum == 0)
7998c2ecf20Sopenharmony_ci		gru = next_gru(blade, gru);
8008c2ecf20Sopenharmony_ci	blade->bs_lru_gru = gru;
8018c2ecf20Sopenharmony_ci	blade->bs_lru_ctxnum = ctxnum;
8028c2ecf20Sopenharmony_ci	ctxnum0 = ctxnum;
8038c2ecf20Sopenharmony_ci	gru0 = gru;
8048c2ecf20Sopenharmony_ci	while (1) {
8058c2ecf20Sopenharmony_ci		if (gru_check_chiplet_assignment(gru, gts)) {
8068c2ecf20Sopenharmony_ci			if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
8078c2ecf20Sopenharmony_ci				break;
8088c2ecf20Sopenharmony_ci			spin_lock(&gru->gs_lock);
8098c2ecf20Sopenharmony_ci			for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
8108c2ecf20Sopenharmony_ci				if (flag && gru == gru0 && ctxnum == ctxnum0)
8118c2ecf20Sopenharmony_ci					break;
8128c2ecf20Sopenharmony_ci				ngts = gru->gs_gts[ctxnum];
8138c2ecf20Sopenharmony_ci				/*
8148c2ecf20Sopenharmony_ci			 	* We are grabbing locks out of order, so trylock is
8158c2ecf20Sopenharmony_ci			 	* needed. GTSs are usually not locked, so the odds of
8168c2ecf20Sopenharmony_ci			 	* success are high. If trylock fails, try to steal a
8178c2ecf20Sopenharmony_ci			 	* different GSEG.
8188c2ecf20Sopenharmony_ci			 	*/
8198c2ecf20Sopenharmony_ci				if (ngts && is_gts_stealable(ngts, blade))
8208c2ecf20Sopenharmony_ci					break;
8218c2ecf20Sopenharmony_ci				ngts = NULL;
8228c2ecf20Sopenharmony_ci			}
8238c2ecf20Sopenharmony_ci			spin_unlock(&gru->gs_lock);
8248c2ecf20Sopenharmony_ci			if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
8258c2ecf20Sopenharmony_ci				break;
8268c2ecf20Sopenharmony_ci		}
8278c2ecf20Sopenharmony_ci		if (flag && gru == gru0)
8288c2ecf20Sopenharmony_ci			break;
8298c2ecf20Sopenharmony_ci		flag = 1;
8308c2ecf20Sopenharmony_ci		ctxnum = 0;
8318c2ecf20Sopenharmony_ci		gru = next_gru(blade, gru);
8328c2ecf20Sopenharmony_ci	}
8338c2ecf20Sopenharmony_ci	spin_unlock(&blade->bs_lock);
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci	if (ngts) {
8368c2ecf20Sopenharmony_ci		gts->ustats.context_stolen++;
8378c2ecf20Sopenharmony_ci		ngts->ts_steal_jiffies = jiffies;
8388c2ecf20Sopenharmony_ci		gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
8398c2ecf20Sopenharmony_ci		gts_stolen(ngts, blade);
8408c2ecf20Sopenharmony_ci	} else {
8418c2ecf20Sopenharmony_ci		STAT(steal_context_failed);
8428c2ecf20Sopenharmony_ci	}
8438c2ecf20Sopenharmony_ci	gru_dbg(grudev,
8448c2ecf20Sopenharmony_ci		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
8458c2ecf20Sopenharmony_ci		" avail cb %ld, ds %ld\n",
8468c2ecf20Sopenharmony_ci		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
8478c2ecf20Sopenharmony_ci		hweight64(gru->gs_dsr_map));
8488c2ecf20Sopenharmony_ci}
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci/*
8518c2ecf20Sopenharmony_ci * Assign a gru context.
8528c2ecf20Sopenharmony_ci */
8538c2ecf20Sopenharmony_cistatic int gru_assign_context_number(struct gru_state *gru)
8548c2ecf20Sopenharmony_ci{
8558c2ecf20Sopenharmony_ci	int ctxnum;
8568c2ecf20Sopenharmony_ci
8578c2ecf20Sopenharmony_ci	ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
8588c2ecf20Sopenharmony_ci	__set_bit(ctxnum, &gru->gs_context_map);
8598c2ecf20Sopenharmony_ci	return ctxnum;
8608c2ecf20Sopenharmony_ci}
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci/*
8638c2ecf20Sopenharmony_ci * Scan the GRUs on the local blade & assign a GRU context.
8648c2ecf20Sopenharmony_ci */
8658c2ecf20Sopenharmony_cistruct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
8668c2ecf20Sopenharmony_ci{
8678c2ecf20Sopenharmony_ci	struct gru_state *gru, *grux;
8688c2ecf20Sopenharmony_ci	int i, max_active_contexts;
8698c2ecf20Sopenharmony_ci	int blade_id = gts->ts_user_blade_id;
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci	if (blade_id < 0)
8728c2ecf20Sopenharmony_ci		blade_id = uv_numa_blade_id();
8738c2ecf20Sopenharmony_ciagain:
8748c2ecf20Sopenharmony_ci	gru = NULL;
8758c2ecf20Sopenharmony_ci	max_active_contexts = GRU_NUM_CCH;
8768c2ecf20Sopenharmony_ci	for_each_gru_on_blade(grux, blade_id, i) {
8778c2ecf20Sopenharmony_ci		if (!gru_check_chiplet_assignment(grux, gts))
8788c2ecf20Sopenharmony_ci			continue;
8798c2ecf20Sopenharmony_ci		if (check_gru_resources(grux, gts->ts_cbr_au_count,
8808c2ecf20Sopenharmony_ci					gts->ts_dsr_au_count,
8818c2ecf20Sopenharmony_ci					max_active_contexts)) {
8828c2ecf20Sopenharmony_ci			gru = grux;
8838c2ecf20Sopenharmony_ci			max_active_contexts = grux->gs_active_contexts;
8848c2ecf20Sopenharmony_ci			if (max_active_contexts == 0)
8858c2ecf20Sopenharmony_ci				break;
8868c2ecf20Sopenharmony_ci		}
8878c2ecf20Sopenharmony_ci	}
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	if (gru) {
8908c2ecf20Sopenharmony_ci		spin_lock(&gru->gs_lock);
8918c2ecf20Sopenharmony_ci		if (!check_gru_resources(gru, gts->ts_cbr_au_count,
8928c2ecf20Sopenharmony_ci					 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
8938c2ecf20Sopenharmony_ci			spin_unlock(&gru->gs_lock);
8948c2ecf20Sopenharmony_ci			goto again;
8958c2ecf20Sopenharmony_ci		}
8968c2ecf20Sopenharmony_ci		reserve_gru_resources(gru, gts);
8978c2ecf20Sopenharmony_ci		gts->ts_gru = gru;
8988c2ecf20Sopenharmony_ci		gts->ts_blade = gru->gs_blade_id;
8998c2ecf20Sopenharmony_ci		gts->ts_ctxnum = gru_assign_context_number(gru);
9008c2ecf20Sopenharmony_ci		atomic_inc(&gts->ts_refcnt);
9018c2ecf20Sopenharmony_ci		gru->gs_gts[gts->ts_ctxnum] = gts;
9028c2ecf20Sopenharmony_ci		spin_unlock(&gru->gs_lock);
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci		STAT(assign_context);
9058c2ecf20Sopenharmony_ci		gru_dbg(grudev,
9068c2ecf20Sopenharmony_ci			"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
9078c2ecf20Sopenharmony_ci			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
9088c2ecf20Sopenharmony_ci			gts->ts_gru->gs_gid, gts->ts_ctxnum,
9098c2ecf20Sopenharmony_ci			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
9108c2ecf20Sopenharmony_ci	} else {
9118c2ecf20Sopenharmony_ci		gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
9128c2ecf20Sopenharmony_ci		STAT(assign_context_failed);
9138c2ecf20Sopenharmony_ci	}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci	return gru;
9168c2ecf20Sopenharmony_ci}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci/*
9198c2ecf20Sopenharmony_ci * gru_nopage
9208c2ecf20Sopenharmony_ci *
9218c2ecf20Sopenharmony_ci * Map the user's GRU segment
9228c2ecf20Sopenharmony_ci *
9238c2ecf20Sopenharmony_ci * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
9248c2ecf20Sopenharmony_ci */
9258c2ecf20Sopenharmony_civm_fault_t gru_fault(struct vm_fault *vmf)
9268c2ecf20Sopenharmony_ci{
9278c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
9288c2ecf20Sopenharmony_ci	struct gru_thread_state *gts;
9298c2ecf20Sopenharmony_ci	unsigned long paddr, vaddr;
9308c2ecf20Sopenharmony_ci	unsigned long expires;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	vaddr = vmf->address;
9338c2ecf20Sopenharmony_ci	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
9348c2ecf20Sopenharmony_ci		vma, vaddr, GSEG_BASE(vaddr));
9358c2ecf20Sopenharmony_ci	STAT(nopfn);
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	/* The following check ensures vaddr is a valid address in the VMA */
9388c2ecf20Sopenharmony_ci	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
9398c2ecf20Sopenharmony_ci	if (!gts)
9408c2ecf20Sopenharmony_ci		return VM_FAULT_SIGBUS;
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ciagain:
9438c2ecf20Sopenharmony_ci	mutex_lock(&gts->ts_ctxlock);
9448c2ecf20Sopenharmony_ci	preempt_disable();
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci	if (gru_check_context_placement(gts)) {
9478c2ecf20Sopenharmony_ci		preempt_enable();
9488c2ecf20Sopenharmony_ci		mutex_unlock(&gts->ts_ctxlock);
9498c2ecf20Sopenharmony_ci		gru_unload_context(gts, 1);
9508c2ecf20Sopenharmony_ci		return VM_FAULT_NOPAGE;
9518c2ecf20Sopenharmony_ci	}
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci	if (!gts->ts_gru) {
9548c2ecf20Sopenharmony_ci		STAT(load_user_context);
9558c2ecf20Sopenharmony_ci		if (!gru_assign_gru_context(gts)) {
9568c2ecf20Sopenharmony_ci			preempt_enable();
9578c2ecf20Sopenharmony_ci			mutex_unlock(&gts->ts_ctxlock);
9588c2ecf20Sopenharmony_ci			set_current_state(TASK_INTERRUPTIBLE);
9598c2ecf20Sopenharmony_ci			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
9608c2ecf20Sopenharmony_ci			expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
9618c2ecf20Sopenharmony_ci			if (time_before(expires, jiffies))
9628c2ecf20Sopenharmony_ci				gru_steal_context(gts);
9638c2ecf20Sopenharmony_ci			goto again;
9648c2ecf20Sopenharmony_ci		}
9658c2ecf20Sopenharmony_ci		gru_load_context(gts);
9668c2ecf20Sopenharmony_ci		paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
9678c2ecf20Sopenharmony_ci		remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
9688c2ecf20Sopenharmony_ci				paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
9698c2ecf20Sopenharmony_ci				vma->vm_page_prot);
9708c2ecf20Sopenharmony_ci	}
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	preempt_enable();
9738c2ecf20Sopenharmony_ci	mutex_unlock(&gts->ts_ctxlock);
9748c2ecf20Sopenharmony_ci
9758c2ecf20Sopenharmony_ci	return VM_FAULT_NOPAGE;
9768c2ecf20Sopenharmony_ci}
9778c2ecf20Sopenharmony_ci
978