18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc.
38c2ecf20Sopenharmony_ci * Copyright 2008 Red Hat Inc.
48c2ecf20Sopenharmony_ci * Copyright 2009 Jerome Glisse.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
78c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
88c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation
98c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
108c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
118c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
148c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
178c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
188c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
198c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
208c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
218c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
228c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci */
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#include "amdgpu.h"
278c2ecf20Sopenharmony_ci#include "amdgpu_gfx.h"
288c2ecf20Sopenharmony_ci#include "amdgpu_rlc.h"
298c2ecf20Sopenharmony_ci#include "amdgpu_ras.h"
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci/* delay 0.1 second to enable gfx off feature */
328c2ecf20Sopenharmony_ci#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/*
358c2ecf20Sopenharmony_ci * GPU GFX IP block helpers function.
368c2ecf20Sopenharmony_ci */
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ciint amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
398c2ecf20Sopenharmony_ci				int pipe, int queue)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	int bit = 0;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	bit += mec * adev->gfx.mec.num_pipe_per_mec
448c2ecf20Sopenharmony_ci		* adev->gfx.mec.num_queue_per_pipe;
458c2ecf20Sopenharmony_ci	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
468c2ecf20Sopenharmony_ci	bit += queue;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	return bit;
498c2ecf20Sopenharmony_ci}
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_civoid amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
528c2ecf20Sopenharmony_ci				 int *mec, int *pipe, int *queue)
538c2ecf20Sopenharmony_ci{
548c2ecf20Sopenharmony_ci	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
558c2ecf20Sopenharmony_ci	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
568c2ecf20Sopenharmony_ci		% adev->gfx.mec.num_pipe_per_mec;
578c2ecf20Sopenharmony_ci	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
588c2ecf20Sopenharmony_ci	       / adev->gfx.mec.num_pipe_per_mec;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci}
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cibool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
638c2ecf20Sopenharmony_ci				     int mec, int pipe, int queue)
648c2ecf20Sopenharmony_ci{
658c2ecf20Sopenharmony_ci	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
668c2ecf20Sopenharmony_ci			adev->gfx.mec.queue_bitmap);
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ciint amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
708c2ecf20Sopenharmony_ci			       int me, int pipe, int queue)
718c2ecf20Sopenharmony_ci{
728c2ecf20Sopenharmony_ci	int bit = 0;
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	bit += me * adev->gfx.me.num_pipe_per_me
758c2ecf20Sopenharmony_ci		* adev->gfx.me.num_queue_per_pipe;
768c2ecf20Sopenharmony_ci	bit += pipe * adev->gfx.me.num_queue_per_pipe;
778c2ecf20Sopenharmony_ci	bit += queue;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	return bit;
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_civoid amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
838c2ecf20Sopenharmony_ci				int *me, int *pipe, int *queue)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	*queue = bit % adev->gfx.me.num_queue_per_pipe;
868c2ecf20Sopenharmony_ci	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
878c2ecf20Sopenharmony_ci		% adev->gfx.me.num_pipe_per_me;
888c2ecf20Sopenharmony_ci	*me = (bit / adev->gfx.me.num_queue_per_pipe)
898c2ecf20Sopenharmony_ci		/ adev->gfx.me.num_pipe_per_me;
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_cibool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
938c2ecf20Sopenharmony_ci				    int me, int pipe, int queue)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
968c2ecf20Sopenharmony_ci			adev->gfx.me.queue_bitmap);
978c2ecf20Sopenharmony_ci}
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci/**
1008c2ecf20Sopenharmony_ci * amdgpu_gfx_scratch_get - Allocate a scratch register
1018c2ecf20Sopenharmony_ci *
1028c2ecf20Sopenharmony_ci * @adev: amdgpu_device pointer
1038c2ecf20Sopenharmony_ci * @reg: scratch register mmio offset
1048c2ecf20Sopenharmony_ci *
1058c2ecf20Sopenharmony_ci * Allocate a CP scratch register for use by the driver (all asics).
1068c2ecf20Sopenharmony_ci * Returns 0 on success or -EINVAL on failure.
1078c2ecf20Sopenharmony_ci */
1088c2ecf20Sopenharmony_ciint amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
1098c2ecf20Sopenharmony_ci{
1108c2ecf20Sopenharmony_ci	int i;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	i = ffs(adev->gfx.scratch.free_mask);
1138c2ecf20Sopenharmony_ci	if (i != 0 && i <= adev->gfx.scratch.num_reg) {
1148c2ecf20Sopenharmony_ci		i--;
1158c2ecf20Sopenharmony_ci		adev->gfx.scratch.free_mask &= ~(1u << i);
1168c2ecf20Sopenharmony_ci		*reg = adev->gfx.scratch.reg_base + i;
1178c2ecf20Sopenharmony_ci		return 0;
1188c2ecf20Sopenharmony_ci	}
1198c2ecf20Sopenharmony_ci	return -EINVAL;
1208c2ecf20Sopenharmony_ci}
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci/**
1238c2ecf20Sopenharmony_ci * amdgpu_gfx_scratch_free - Free a scratch register
1248c2ecf20Sopenharmony_ci *
1258c2ecf20Sopenharmony_ci * @adev: amdgpu_device pointer
1268c2ecf20Sopenharmony_ci * @reg: scratch register mmio offset
1278c2ecf20Sopenharmony_ci *
1288c2ecf20Sopenharmony_ci * Free a CP scratch register allocated for use by the driver (all asics)
1298c2ecf20Sopenharmony_ci */
1308c2ecf20Sopenharmony_civoid amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
1318c2ecf20Sopenharmony_ci{
1328c2ecf20Sopenharmony_ci	adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci/**
1368c2ecf20Sopenharmony_ci * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
1378c2ecf20Sopenharmony_ci *
1388c2ecf20Sopenharmony_ci * @mask: array in which the per-shader array disable masks will be stored
1398c2ecf20Sopenharmony_ci * @max_se: number of SEs
1408c2ecf20Sopenharmony_ci * @max_sh: number of SHs
1418c2ecf20Sopenharmony_ci *
1428c2ecf20Sopenharmony_ci * The bitmask of CUs to be disabled in the shader array determined by se and
1438c2ecf20Sopenharmony_ci * sh is stored in mask[se * max_sh + sh].
1448c2ecf20Sopenharmony_ci */
1458c2ecf20Sopenharmony_civoid amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
1468c2ecf20Sopenharmony_ci{
1478c2ecf20Sopenharmony_ci	unsigned se, sh, cu;
1488c2ecf20Sopenharmony_ci	const char *p;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
1538c2ecf20Sopenharmony_ci		return;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	p = amdgpu_disable_cu;
1568c2ecf20Sopenharmony_ci	for (;;) {
1578c2ecf20Sopenharmony_ci		char *next;
1588c2ecf20Sopenharmony_ci		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
1598c2ecf20Sopenharmony_ci		if (ret < 3) {
1608c2ecf20Sopenharmony_ci			DRM_ERROR("amdgpu: could not parse disable_cu\n");
1618c2ecf20Sopenharmony_ci			return;
1628c2ecf20Sopenharmony_ci		}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci		if (se < max_se && sh < max_sh && cu < 16) {
1658c2ecf20Sopenharmony_ci			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
1668c2ecf20Sopenharmony_ci			mask[se * max_sh + sh] |= 1u << cu;
1678c2ecf20Sopenharmony_ci		} else {
1688c2ecf20Sopenharmony_ci			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
1698c2ecf20Sopenharmony_ci				  se, sh, cu);
1708c2ecf20Sopenharmony_ci		}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		next = strchr(p, ',');
1738c2ecf20Sopenharmony_ci		if (!next)
1748c2ecf20Sopenharmony_ci			break;
1758c2ecf20Sopenharmony_ci		p = next + 1;
1768c2ecf20Sopenharmony_ci	}
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_cistatic bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	if (amdgpu_compute_multipipe != -1) {
1828c2ecf20Sopenharmony_ci		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
1838c2ecf20Sopenharmony_ci			 amdgpu_compute_multipipe);
1848c2ecf20Sopenharmony_ci		return amdgpu_compute_multipipe == 1;
1858c2ecf20Sopenharmony_ci	}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	/* FIXME: spreading the queues across pipes causes perf regressions
1888c2ecf20Sopenharmony_ci	 * on POLARIS11 compute workloads */
1898c2ecf20Sopenharmony_ci	if (adev->asic_type == CHIP_POLARIS11)
1908c2ecf20Sopenharmony_ci		return false;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	return adev->gfx.mec.num_mec > 1;
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_cibool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
1968c2ecf20Sopenharmony_ci					       int pipe, int queue)
1978c2ecf20Sopenharmony_ci{
1988c2ecf20Sopenharmony_ci	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
1998c2ecf20Sopenharmony_ci	int cond;
2008c2ecf20Sopenharmony_ci	/* Policy: alternate between normal and high priority */
2018c2ecf20Sopenharmony_ci	cond = multipipe_policy ? pipe : queue;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	return ((cond % 2) != 0);
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_civoid amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	int i, queue, pipe;
2108c2ecf20Sopenharmony_ci	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
2118c2ecf20Sopenharmony_ci	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
2128c2ecf20Sopenharmony_ci				     adev->gfx.mec.num_queue_per_pipe,
2138c2ecf20Sopenharmony_ci				     adev->gfx.num_compute_rings);
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	if (multipipe_policy) {
2168c2ecf20Sopenharmony_ci		/* policy: make queues evenly cross all pipes on MEC1 only */
2178c2ecf20Sopenharmony_ci		for (i = 0; i < max_queues_per_mec; i++) {
2188c2ecf20Sopenharmony_ci			pipe = i % adev->gfx.mec.num_pipe_per_mec;
2198c2ecf20Sopenharmony_ci			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
2208c2ecf20Sopenharmony_ci				adev->gfx.mec.num_queue_per_pipe;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
2238c2ecf20Sopenharmony_ci					adev->gfx.mec.queue_bitmap);
2248c2ecf20Sopenharmony_ci		}
2258c2ecf20Sopenharmony_ci	} else {
2268c2ecf20Sopenharmony_ci		/* policy: amdgpu owns all queues in the given pipe */
2278c2ecf20Sopenharmony_ci		for (i = 0; i < max_queues_per_mec; ++i)
2288c2ecf20Sopenharmony_ci			set_bit(i, adev->gfx.mec.queue_bitmap);
2298c2ecf20Sopenharmony_ci	}
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
2328c2ecf20Sopenharmony_ci}
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_civoid amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
2358c2ecf20Sopenharmony_ci{
2368c2ecf20Sopenharmony_ci	int i, queue, me;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
2398c2ecf20Sopenharmony_ci		queue = i % adev->gfx.me.num_queue_per_pipe;
2408c2ecf20Sopenharmony_ci		me = (i / adev->gfx.me.num_queue_per_pipe)
2418c2ecf20Sopenharmony_ci		      / adev->gfx.me.num_pipe_per_me;
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci		if (me >= adev->gfx.me.num_me)
2448c2ecf20Sopenharmony_ci			break;
2458c2ecf20Sopenharmony_ci		/* policy: amdgpu owns the first queue per pipe at this stage
2468c2ecf20Sopenharmony_ci		 * will extend to mulitple queues per pipe later */
2478c2ecf20Sopenharmony_ci		if (me == 0 && queue < 1)
2488c2ecf20Sopenharmony_ci			set_bit(i, adev->gfx.me.queue_bitmap);
2498c2ecf20Sopenharmony_ci	}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	/* update the number of active graphics rings */
2528c2ecf20Sopenharmony_ci	adev->gfx.num_gfx_rings =
2538c2ecf20Sopenharmony_ci		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
2548c2ecf20Sopenharmony_ci}
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_cistatic int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
2578c2ecf20Sopenharmony_ci				  struct amdgpu_ring *ring)
2588c2ecf20Sopenharmony_ci{
2598c2ecf20Sopenharmony_ci	int queue_bit;
2608c2ecf20Sopenharmony_ci	int mec, pipe, queue;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	queue_bit = adev->gfx.mec.num_mec
2638c2ecf20Sopenharmony_ci		    * adev->gfx.mec.num_pipe_per_mec
2648c2ecf20Sopenharmony_ci		    * adev->gfx.mec.num_queue_per_pipe;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	while (--queue_bit >= 0) {
2678c2ecf20Sopenharmony_ci		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
2688c2ecf20Sopenharmony_ci			continue;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci		/*
2738c2ecf20Sopenharmony_ci		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
2748c2ecf20Sopenharmony_ci		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
2758c2ecf20Sopenharmony_ci		 * only can be issued on queue 0.
2768c2ecf20Sopenharmony_ci		 */
2778c2ecf20Sopenharmony_ci		if ((mec == 1 && pipe > 1) || queue != 0)
2788c2ecf20Sopenharmony_ci			continue;
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci		ring->me = mec + 1;
2818c2ecf20Sopenharmony_ci		ring->pipe = pipe;
2828c2ecf20Sopenharmony_ci		ring->queue = queue;
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci		return 0;
2858c2ecf20Sopenharmony_ci	}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
2888c2ecf20Sopenharmony_ci	return -EINVAL;
2898c2ecf20Sopenharmony_ci}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ciint amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
2928c2ecf20Sopenharmony_ci			     struct amdgpu_ring *ring,
2938c2ecf20Sopenharmony_ci			     struct amdgpu_irq_src *irq)
2948c2ecf20Sopenharmony_ci{
2958c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
2968c2ecf20Sopenharmony_ci	int r = 0;
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	spin_lock_init(&kiq->ring_lock);
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	ring->adev = NULL;
3018c2ecf20Sopenharmony_ci	ring->ring_obj = NULL;
3028c2ecf20Sopenharmony_ci	ring->use_doorbell = true;
3038c2ecf20Sopenharmony_ci	ring->doorbell_index = adev->doorbell_index.kiq;
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	r = amdgpu_gfx_kiq_acquire(adev, ring);
3068c2ecf20Sopenharmony_ci	if (r)
3078c2ecf20Sopenharmony_ci		return r;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	ring->eop_gpu_addr = kiq->eop_gpu_addr;
3108c2ecf20Sopenharmony_ci	ring->no_scheduler = true;
3118c2ecf20Sopenharmony_ci	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
3128c2ecf20Sopenharmony_ci	r = amdgpu_ring_init(adev, ring, 1024,
3138c2ecf20Sopenharmony_ci			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
3148c2ecf20Sopenharmony_ci			     AMDGPU_RING_PRIO_DEFAULT);
3158c2ecf20Sopenharmony_ci	if (r)
3168c2ecf20Sopenharmony_ci		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	return r;
3198c2ecf20Sopenharmony_ci}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_civoid amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	amdgpu_ring_fini(ring);
3248c2ecf20Sopenharmony_ci}
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_civoid amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
3278c2ecf20Sopenharmony_ci{
3288c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
3318c2ecf20Sopenharmony_ci}
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ciint amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
3348c2ecf20Sopenharmony_ci			unsigned hpd_size)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	int r;
3378c2ecf20Sopenharmony_ci	u32 *hpd;
3388c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
3418c2ecf20Sopenharmony_ci				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
3428c2ecf20Sopenharmony_ci				    &kiq->eop_gpu_addr, (void **)&hpd);
3438c2ecf20Sopenharmony_ci	if (r) {
3448c2ecf20Sopenharmony_ci		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
3458c2ecf20Sopenharmony_ci		return r;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	memset(hpd, 0, hpd_size);
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	r = amdgpu_bo_reserve(kiq->eop_obj, true);
3518c2ecf20Sopenharmony_ci	if (unlikely(r != 0))
3528c2ecf20Sopenharmony_ci		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
3538c2ecf20Sopenharmony_ci	amdgpu_bo_kunmap(kiq->eop_obj);
3548c2ecf20Sopenharmony_ci	amdgpu_bo_unreserve(kiq->eop_obj);
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	return 0;
3578c2ecf20Sopenharmony_ci}
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci/* create MQD for each compute/gfx queue */
3608c2ecf20Sopenharmony_ciint amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
3618c2ecf20Sopenharmony_ci			   unsigned mqd_size)
3628c2ecf20Sopenharmony_ci{
3638c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring = NULL;
3648c2ecf20Sopenharmony_ci	int r, i;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	/* create MQD for KIQ */
3678c2ecf20Sopenharmony_ci	ring = &adev->gfx.kiq.ring;
3688c2ecf20Sopenharmony_ci	if (!ring->mqd_obj) {
3698c2ecf20Sopenharmony_ci		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
3708c2ecf20Sopenharmony_ci		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
3718c2ecf20Sopenharmony_ci		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
3728c2ecf20Sopenharmony_ci		 * KIQ MQD no matter SRIOV or Bare-metal
3738c2ecf20Sopenharmony_ci		 */
3748c2ecf20Sopenharmony_ci		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
3758c2ecf20Sopenharmony_ci					    AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
3768c2ecf20Sopenharmony_ci					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
3778c2ecf20Sopenharmony_ci		if (r) {
3788c2ecf20Sopenharmony_ci			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
3798c2ecf20Sopenharmony_ci			return r;
3808c2ecf20Sopenharmony_ci		}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci		/* prepare MQD backup */
3838c2ecf20Sopenharmony_ci		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
3848c2ecf20Sopenharmony_ci		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
3858c2ecf20Sopenharmony_ci				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
3868c2ecf20Sopenharmony_ci	}
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
3898c2ecf20Sopenharmony_ci		/* create MQD for each KGQ */
3908c2ecf20Sopenharmony_ci		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3918c2ecf20Sopenharmony_ci			ring = &adev->gfx.gfx_ring[i];
3928c2ecf20Sopenharmony_ci			if (!ring->mqd_obj) {
3938c2ecf20Sopenharmony_ci				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
3948c2ecf20Sopenharmony_ci							    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
3958c2ecf20Sopenharmony_ci							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
3968c2ecf20Sopenharmony_ci				if (r) {
3978c2ecf20Sopenharmony_ci					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
3988c2ecf20Sopenharmony_ci					return r;
3998c2ecf20Sopenharmony_ci				}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci				/* prepare MQD backup */
4028c2ecf20Sopenharmony_ci				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
4038c2ecf20Sopenharmony_ci				if (!adev->gfx.me.mqd_backup[i])
4048c2ecf20Sopenharmony_ci					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
4058c2ecf20Sopenharmony_ci			}
4068c2ecf20Sopenharmony_ci		}
4078c2ecf20Sopenharmony_ci	}
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	/* create MQD for each KCQ */
4108c2ecf20Sopenharmony_ci	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4118c2ecf20Sopenharmony_ci		ring = &adev->gfx.compute_ring[i];
4128c2ecf20Sopenharmony_ci		if (!ring->mqd_obj) {
4138c2ecf20Sopenharmony_ci			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
4148c2ecf20Sopenharmony_ci						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4158c2ecf20Sopenharmony_ci						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
4168c2ecf20Sopenharmony_ci			if (r) {
4178c2ecf20Sopenharmony_ci				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
4188c2ecf20Sopenharmony_ci				return r;
4198c2ecf20Sopenharmony_ci			}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci			/* prepare MQD backup */
4228c2ecf20Sopenharmony_ci			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
4238c2ecf20Sopenharmony_ci			if (!adev->gfx.mec.mqd_backup[i])
4248c2ecf20Sopenharmony_ci				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
4258c2ecf20Sopenharmony_ci		}
4268c2ecf20Sopenharmony_ci	}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	return 0;
4298c2ecf20Sopenharmony_ci}
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_civoid amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
4328c2ecf20Sopenharmony_ci{
4338c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring = NULL;
4348c2ecf20Sopenharmony_ci	int i;
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
4378c2ecf20Sopenharmony_ci		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4388c2ecf20Sopenharmony_ci			ring = &adev->gfx.gfx_ring[i];
4398c2ecf20Sopenharmony_ci			kfree(adev->gfx.me.mqd_backup[i]);
4408c2ecf20Sopenharmony_ci			amdgpu_bo_free_kernel(&ring->mqd_obj,
4418c2ecf20Sopenharmony_ci					      &ring->mqd_gpu_addr,
4428c2ecf20Sopenharmony_ci					      &ring->mqd_ptr);
4438c2ecf20Sopenharmony_ci		}
4448c2ecf20Sopenharmony_ci	}
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4478c2ecf20Sopenharmony_ci		ring = &adev->gfx.compute_ring[i];
4488c2ecf20Sopenharmony_ci		kfree(adev->gfx.mec.mqd_backup[i]);
4498c2ecf20Sopenharmony_ci		amdgpu_bo_free_kernel(&ring->mqd_obj,
4508c2ecf20Sopenharmony_ci				      &ring->mqd_gpu_addr,
4518c2ecf20Sopenharmony_ci				      &ring->mqd_ptr);
4528c2ecf20Sopenharmony_ci	}
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	ring = &adev->gfx.kiq.ring;
4558c2ecf20Sopenharmony_ci	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
4568c2ecf20Sopenharmony_ci	amdgpu_bo_free_kernel(&ring->mqd_obj,
4578c2ecf20Sopenharmony_ci			      &ring->mqd_gpu_addr,
4588c2ecf20Sopenharmony_ci			      &ring->mqd_ptr);
4598c2ecf20Sopenharmony_ci}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ciint amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
4628c2ecf20Sopenharmony_ci{
4638c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4648c2ecf20Sopenharmony_ci	struct amdgpu_ring *kiq_ring = &kiq->ring;
4658c2ecf20Sopenharmony_ci	int i;
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4688c2ecf20Sopenharmony_ci		return -EINVAL;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4718c2ecf20Sopenharmony_ci					adev->gfx.num_compute_rings))
4728c2ecf20Sopenharmony_ci		return -ENOMEM;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	for (i = 0; i < adev->gfx.num_compute_rings; i++)
4758c2ecf20Sopenharmony_ci		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
4768c2ecf20Sopenharmony_ci					   RESET_QUEUES, 0, 0);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	return amdgpu_ring_test_helper(kiq_ring);
4798c2ecf20Sopenharmony_ci}
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ciint amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
4828c2ecf20Sopenharmony_ci					int queue_bit)
4838c2ecf20Sopenharmony_ci{
4848c2ecf20Sopenharmony_ci	int mec, pipe, queue;
4858c2ecf20Sopenharmony_ci	int set_resource_bit = 0;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	return set_resource_bit;
4928c2ecf20Sopenharmony_ci}
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ciint amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
4958c2ecf20Sopenharmony_ci{
4968c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4978c2ecf20Sopenharmony_ci	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4988c2ecf20Sopenharmony_ci	uint64_t queue_mask = 0;
4998c2ecf20Sopenharmony_ci	int r, i;
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
5028c2ecf20Sopenharmony_ci		return -EINVAL;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
5058c2ecf20Sopenharmony_ci		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
5068c2ecf20Sopenharmony_ci			continue;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci		/* This situation may be hit in the future if a new HW
5098c2ecf20Sopenharmony_ci		 * generation exposes more than 64 queues. If so, the
5108c2ecf20Sopenharmony_ci		 * definition of queue_mask needs updating */
5118c2ecf20Sopenharmony_ci		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
5128c2ecf20Sopenharmony_ci			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
5138c2ecf20Sopenharmony_ci			break;
5148c2ecf20Sopenharmony_ci		}
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
5178c2ecf20Sopenharmony_ci	}
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
5208c2ecf20Sopenharmony_ci							kiq_ring->queue);
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
5238c2ecf20Sopenharmony_ci					adev->gfx.num_compute_rings +
5248c2ecf20Sopenharmony_ci					kiq->pmf->set_resources_size);
5258c2ecf20Sopenharmony_ci	if (r) {
5268c2ecf20Sopenharmony_ci		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5278c2ecf20Sopenharmony_ci		return r;
5288c2ecf20Sopenharmony_ci	}
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
5318c2ecf20Sopenharmony_ci	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5328c2ecf20Sopenharmony_ci		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	r = amdgpu_ring_test_helper(kiq_ring);
5358c2ecf20Sopenharmony_ci	if (r)
5368c2ecf20Sopenharmony_ci		DRM_ERROR("KCQ enable failed\n");
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	return r;
5398c2ecf20Sopenharmony_ci}
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
5428c2ecf20Sopenharmony_ci *
5438c2ecf20Sopenharmony_ci * @adev: amdgpu_device pointer
5448c2ecf20Sopenharmony_ci * @bool enable true: enable gfx off feature, false: disable gfx off feature
5458c2ecf20Sopenharmony_ci *
5468c2ecf20Sopenharmony_ci * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
5478c2ecf20Sopenharmony_ci * 2. other client can send request to disable gfx off feature, the request should be honored.
5488c2ecf20Sopenharmony_ci * 3. other client can cancel their request of disable gfx off feature
5498c2ecf20Sopenharmony_ci * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
5508c2ecf20Sopenharmony_ci */
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_civoid amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
5538c2ecf20Sopenharmony_ci{
5548c2ecf20Sopenharmony_ci	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
5558c2ecf20Sopenharmony_ci		return;
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	mutex_lock(&adev->gfx.gfx_off_mutex);
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	if (enable) {
5608c2ecf20Sopenharmony_ci		/* If the count is already 0, it means there's an imbalance bug somewhere.
5618c2ecf20Sopenharmony_ci		 * Note that the bug may be in a different caller than the one which triggers the
5628c2ecf20Sopenharmony_ci		 * WARN_ON_ONCE.
5638c2ecf20Sopenharmony_ci		 */
5648c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
5658c2ecf20Sopenharmony_ci			goto unlock;
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci		adev->gfx.gfx_off_req_count--;
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci		if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
5708c2ecf20Sopenharmony_ci			schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
5718c2ecf20Sopenharmony_ci	} else {
5728c2ecf20Sopenharmony_ci		if (adev->gfx.gfx_off_req_count == 0) {
5738c2ecf20Sopenharmony_ci			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci			if (adev->gfx.gfx_off_state &&
5768c2ecf20Sopenharmony_ci			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
5778c2ecf20Sopenharmony_ci				adev->gfx.gfx_off_state = false;
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci				if (adev->gfx.funcs->init_spm_golden) {
5808c2ecf20Sopenharmony_ci					dev_dbg(adev->dev,
5818c2ecf20Sopenharmony_ci						"GFXOFF is disabled, re-init SPM golden settings\n");
5828c2ecf20Sopenharmony_ci					amdgpu_gfx_init_spm_golden(adev);
5838c2ecf20Sopenharmony_ci				}
5848c2ecf20Sopenharmony_ci			}
5858c2ecf20Sopenharmony_ci		}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci		adev->gfx.gfx_off_req_count++;
5888c2ecf20Sopenharmony_ci	}
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ciunlock:
5918c2ecf20Sopenharmony_ci	mutex_unlock(&adev->gfx.gfx_off_mutex);
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ciint amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
5958c2ecf20Sopenharmony_ci{
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	int r = 0;
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	mutex_lock(&adev->gfx.gfx_off_mutex);
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	r = smu_get_status_gfxoff(adev, value);
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	mutex_unlock(&adev->gfx.gfx_off_mutex);
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	return r;
6068c2ecf20Sopenharmony_ci}
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ciint amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
6098c2ecf20Sopenharmony_ci{
6108c2ecf20Sopenharmony_ci	int r;
6118c2ecf20Sopenharmony_ci	struct ras_fs_if fs_info = {
6128c2ecf20Sopenharmony_ci		.sysfs_name = "gfx_err_count",
6138c2ecf20Sopenharmony_ci	};
6148c2ecf20Sopenharmony_ci	struct ras_ih_if ih_info = {
6158c2ecf20Sopenharmony_ci		.cb = amdgpu_gfx_process_ras_data_cb,
6168c2ecf20Sopenharmony_ci	};
6178c2ecf20Sopenharmony_ci
6188c2ecf20Sopenharmony_ci	if (!adev->gfx.ras_if) {
6198c2ecf20Sopenharmony_ci		adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
6208c2ecf20Sopenharmony_ci		if (!adev->gfx.ras_if)
6218c2ecf20Sopenharmony_ci			return -ENOMEM;
6228c2ecf20Sopenharmony_ci		adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
6238c2ecf20Sopenharmony_ci		adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
6248c2ecf20Sopenharmony_ci		adev->gfx.ras_if->sub_block_index = 0;
6258c2ecf20Sopenharmony_ci		strcpy(adev->gfx.ras_if->name, "gfx");
6268c2ecf20Sopenharmony_ci	}
6278c2ecf20Sopenharmony_ci	fs_info.head = ih_info.head = *adev->gfx.ras_if;
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
6308c2ecf20Sopenharmony_ci				 &fs_info, &ih_info);
6318c2ecf20Sopenharmony_ci	if (r)
6328c2ecf20Sopenharmony_ci		goto free;
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci	if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
6358c2ecf20Sopenharmony_ci		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
6368c2ecf20Sopenharmony_ci		if (r)
6378c2ecf20Sopenharmony_ci			goto late_fini;
6388c2ecf20Sopenharmony_ci	} else {
6398c2ecf20Sopenharmony_ci		/* free gfx ras_if if ras is not supported */
6408c2ecf20Sopenharmony_ci		r = 0;
6418c2ecf20Sopenharmony_ci		goto free;
6428c2ecf20Sopenharmony_ci	}
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	return 0;
6458c2ecf20Sopenharmony_cilate_fini:
6468c2ecf20Sopenharmony_ci	amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
6478c2ecf20Sopenharmony_cifree:
6488c2ecf20Sopenharmony_ci	kfree(adev->gfx.ras_if);
6498c2ecf20Sopenharmony_ci	adev->gfx.ras_if = NULL;
6508c2ecf20Sopenharmony_ci	return r;
6518c2ecf20Sopenharmony_ci}
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_civoid amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
6548c2ecf20Sopenharmony_ci{
6558c2ecf20Sopenharmony_ci	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
6568c2ecf20Sopenharmony_ci			adev->gfx.ras_if) {
6578c2ecf20Sopenharmony_ci		struct ras_common_if *ras_if = adev->gfx.ras_if;
6588c2ecf20Sopenharmony_ci		struct ras_ih_if ih_info = {
6598c2ecf20Sopenharmony_ci			.head = *ras_if,
6608c2ecf20Sopenharmony_ci			.cb = amdgpu_gfx_process_ras_data_cb,
6618c2ecf20Sopenharmony_ci		};
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
6648c2ecf20Sopenharmony_ci		kfree(ras_if);
6658c2ecf20Sopenharmony_ci	}
6668c2ecf20Sopenharmony_ci}
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ciint amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
6698c2ecf20Sopenharmony_ci		void *err_data,
6708c2ecf20Sopenharmony_ci		struct amdgpu_iv_entry *entry)
6718c2ecf20Sopenharmony_ci{
6728c2ecf20Sopenharmony_ci	/* TODO ue will trigger an interrupt.
6738c2ecf20Sopenharmony_ci	 *
6748c2ecf20Sopenharmony_ci	 * When “Full RAS” is enabled, the per-IP interrupt sources should
6758c2ecf20Sopenharmony_ci	 * be disabled and the driver should only look for the aggregated
6768c2ecf20Sopenharmony_ci	 * interrupt via sync flood
6778c2ecf20Sopenharmony_ci	 */
6788c2ecf20Sopenharmony_ci	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
6798c2ecf20Sopenharmony_ci		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
6808c2ecf20Sopenharmony_ci		if (adev->gfx.funcs->query_ras_error_count)
6818c2ecf20Sopenharmony_ci			adev->gfx.funcs->query_ras_error_count(adev, err_data);
6828c2ecf20Sopenharmony_ci		amdgpu_ras_reset_gpu(adev);
6838c2ecf20Sopenharmony_ci	}
6848c2ecf20Sopenharmony_ci	return AMDGPU_RAS_SUCCESS;
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ciint amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
6888c2ecf20Sopenharmony_ci				  struct amdgpu_irq_src *source,
6898c2ecf20Sopenharmony_ci				  struct amdgpu_iv_entry *entry)
6908c2ecf20Sopenharmony_ci{
6918c2ecf20Sopenharmony_ci	struct ras_common_if *ras_if = adev->gfx.ras_if;
6928c2ecf20Sopenharmony_ci	struct ras_dispatch_if ih_data = {
6938c2ecf20Sopenharmony_ci		.entry = entry,
6948c2ecf20Sopenharmony_ci	};
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci	if (!ras_if)
6978c2ecf20Sopenharmony_ci		return 0;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	ih_data.head = *ras_if;
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci	DRM_ERROR("CP ECC ERROR IRQ\n");
7028c2ecf20Sopenharmony_ci	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
7038c2ecf20Sopenharmony_ci	return 0;
7048c2ecf20Sopenharmony_ci}
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ciuint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
7078c2ecf20Sopenharmony_ci{
7088c2ecf20Sopenharmony_ci	signed long r, cnt = 0;
7098c2ecf20Sopenharmony_ci	unsigned long flags;
7108c2ecf20Sopenharmony_ci	uint32_t seq, reg_val_offs = 0, value = 0;
7118c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
7128c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring = &kiq->ring;
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	if (adev->in_pci_err_recovery)
7158c2ecf20Sopenharmony_ci		return 0;
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci	BUG_ON(!ring->funcs->emit_rreg);
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci	spin_lock_irqsave(&kiq->ring_lock, flags);
7208c2ecf20Sopenharmony_ci	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
7218c2ecf20Sopenharmony_ci		pr_err("critical bug! too many kiq readers\n");
7228c2ecf20Sopenharmony_ci		goto failed_unlock;
7238c2ecf20Sopenharmony_ci	}
7248c2ecf20Sopenharmony_ci	amdgpu_ring_alloc(ring, 32);
7258c2ecf20Sopenharmony_ci	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
7268c2ecf20Sopenharmony_ci	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
7278c2ecf20Sopenharmony_ci	if (r)
7288c2ecf20Sopenharmony_ci		goto failed_undo;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	amdgpu_ring_commit(ring);
7318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	/* don't wait anymore for gpu reset case because this way may
7368c2ecf20Sopenharmony_ci	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
7378c2ecf20Sopenharmony_ci	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
7388c2ecf20Sopenharmony_ci	 * never return if we keep waiting in virt_kiq_rreg, which cause
7398c2ecf20Sopenharmony_ci	 * gpu_recover() hang there.
7408c2ecf20Sopenharmony_ci	 *
7418c2ecf20Sopenharmony_ci	 * also don't wait anymore for IRQ context
7428c2ecf20Sopenharmony_ci	 * */
7438c2ecf20Sopenharmony_ci	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
7448c2ecf20Sopenharmony_ci		goto failed_kiq_read;
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_ci	might_sleep();
7478c2ecf20Sopenharmony_ci	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
7488c2ecf20Sopenharmony_ci		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
7498c2ecf20Sopenharmony_ci		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
7508c2ecf20Sopenharmony_ci	}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	if (cnt > MAX_KIQ_REG_TRY)
7538c2ecf20Sopenharmony_ci		goto failed_kiq_read;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	mb();
7568c2ecf20Sopenharmony_ci	value = adev->wb.wb[reg_val_offs];
7578c2ecf20Sopenharmony_ci	amdgpu_device_wb_free(adev, reg_val_offs);
7588c2ecf20Sopenharmony_ci	return value;
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_cifailed_undo:
7618c2ecf20Sopenharmony_ci	amdgpu_ring_undo(ring);
7628c2ecf20Sopenharmony_cifailed_unlock:
7638c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7648c2ecf20Sopenharmony_cifailed_kiq_read:
7658c2ecf20Sopenharmony_ci	if (reg_val_offs)
7668c2ecf20Sopenharmony_ci		amdgpu_device_wb_free(adev, reg_val_offs);
7678c2ecf20Sopenharmony_ci	dev_err(adev->dev, "failed to read reg:%x\n", reg);
7688c2ecf20Sopenharmony_ci	return ~0;
7698c2ecf20Sopenharmony_ci}
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_civoid amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
7728c2ecf20Sopenharmony_ci{
7738c2ecf20Sopenharmony_ci	signed long r, cnt = 0;
7748c2ecf20Sopenharmony_ci	unsigned long flags;
7758c2ecf20Sopenharmony_ci	uint32_t seq;
7768c2ecf20Sopenharmony_ci	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
7778c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring = &kiq->ring;
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	BUG_ON(!ring->funcs->emit_wreg);
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	if (adev->in_pci_err_recovery)
7828c2ecf20Sopenharmony_ci		return;
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	spin_lock_irqsave(&kiq->ring_lock, flags);
7858c2ecf20Sopenharmony_ci	amdgpu_ring_alloc(ring, 32);
7868c2ecf20Sopenharmony_ci	amdgpu_ring_emit_wreg(ring, reg, v);
7878c2ecf20Sopenharmony_ci	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
7888c2ecf20Sopenharmony_ci	if (r)
7898c2ecf20Sopenharmony_ci		goto failed_undo;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	amdgpu_ring_commit(ring);
7928c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	/* don't wait anymore for gpu reset case because this way may
7978c2ecf20Sopenharmony_ci	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
7988c2ecf20Sopenharmony_ci	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
7998c2ecf20Sopenharmony_ci	 * never return if we keep waiting in virt_kiq_rreg, which cause
8008c2ecf20Sopenharmony_ci	 * gpu_recover() hang there.
8018c2ecf20Sopenharmony_ci	 *
8028c2ecf20Sopenharmony_ci	 * also don't wait anymore for IRQ context
8038c2ecf20Sopenharmony_ci	 * */
8048c2ecf20Sopenharmony_ci	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
8058c2ecf20Sopenharmony_ci		goto failed_kiq_write;
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	might_sleep();
8088c2ecf20Sopenharmony_ci	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
8118c2ecf20Sopenharmony_ci		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
8128c2ecf20Sopenharmony_ci	}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci	if (cnt > MAX_KIQ_REG_TRY)
8158c2ecf20Sopenharmony_ci		goto failed_kiq_write;
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci	return;
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_cifailed_undo:
8208c2ecf20Sopenharmony_ci	amdgpu_ring_undo(ring);
8218c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&kiq->ring_lock, flags);
8228c2ecf20Sopenharmony_cifailed_kiq_write:
8238c2ecf20Sopenharmony_ci	dev_err(adev->dev, "failed to write reg:%x\n", reg);
8248c2ecf20Sopenharmony_ci}
825