18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright 2013 Advanced Micro Devices, Inc.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation
78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
218c2ecf20Sopenharmony_ci *
228c2ecf20Sopenharmony_ci * Authors: Alex Deucher
238c2ecf20Sopenharmony_ci */
248c2ecf20Sopenharmony_ci#include <linux/firmware.h>
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#include "radeon.h"
278c2ecf20Sopenharmony_ci#include "radeon_ucode.h"
288c2ecf20Sopenharmony_ci#include "radeon_asic.h"
298c2ecf20Sopenharmony_ci#include "radeon_trace.h"
308c2ecf20Sopenharmony_ci#include "cikd.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/* sdma */
338c2ecf20Sopenharmony_ci#define CIK_SDMA_UCODE_SIZE 1050
348c2ecf20Sopenharmony_ci#define CIK_SDMA_UCODE_VERSION 64
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ciu32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci/*
398c2ecf20Sopenharmony_ci * sDMA - System DMA
408c2ecf20Sopenharmony_ci * Starting with CIK, the GPU has new asynchronous
418c2ecf20Sopenharmony_ci * DMA engines.  These engines are used for compute
428c2ecf20Sopenharmony_ci * and gfx.  There are two DMA engines (SDMA0, SDMA1)
438c2ecf20Sopenharmony_ci * and each one supports 1 ring buffer used for gfx
448c2ecf20Sopenharmony_ci * and 2 queues used for compute.
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci * The programming model is very similar to the CP
478c2ecf20Sopenharmony_ci * (ring buffer, IBs, etc.), but sDMA has it's own
488c2ecf20Sopenharmony_ci * packet format that is different from the PM4 format
498c2ecf20Sopenharmony_ci * used by the CP. sDMA supports copying data, writing
508c2ecf20Sopenharmony_ci * embedded data, solid fills, and a number of other
518c2ecf20Sopenharmony_ci * things.  It also has support for tiling/detiling of
528c2ecf20Sopenharmony_ci * buffers.
538c2ecf20Sopenharmony_ci */
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci/**
568c2ecf20Sopenharmony_ci * cik_sdma_get_rptr - get the current read pointer
578c2ecf20Sopenharmony_ci *
588c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
598c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * Get the current rptr from the hardware (CIK+).
628c2ecf20Sopenharmony_ci */
638c2ecf20Sopenharmony_ciuint32_t cik_sdma_get_rptr(struct radeon_device *rdev,
648c2ecf20Sopenharmony_ci			   struct radeon_ring *ring)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	u32 rptr, reg;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	if (rdev->wb.enabled) {
698c2ecf20Sopenharmony_ci		rptr = rdev->wb.wb[ring->rptr_offs/4];
708c2ecf20Sopenharmony_ci	} else {
718c2ecf20Sopenharmony_ci		if (ring->idx == R600_RING_TYPE_DMA_INDEX)
728c2ecf20Sopenharmony_ci			reg = SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET;
738c2ecf20Sopenharmony_ci		else
748c2ecf20Sopenharmony_ci			reg = SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci		rptr = RREG32(reg);
778c2ecf20Sopenharmony_ci	}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	return (rptr & 0x3fffc) >> 2;
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci/**
838c2ecf20Sopenharmony_ci * cik_sdma_get_wptr - get the current write pointer
848c2ecf20Sopenharmony_ci *
858c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
868c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
878c2ecf20Sopenharmony_ci *
888c2ecf20Sopenharmony_ci * Get the current wptr from the hardware (CIK+).
898c2ecf20Sopenharmony_ci */
908c2ecf20Sopenharmony_ciuint32_t cik_sdma_get_wptr(struct radeon_device *rdev,
918c2ecf20Sopenharmony_ci			   struct radeon_ring *ring)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci	u32 reg;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
968c2ecf20Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
978c2ecf20Sopenharmony_ci	else
988c2ecf20Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	return (RREG32(reg) & 0x3fffc) >> 2;
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci/**
1048c2ecf20Sopenharmony_ci * cik_sdma_set_wptr - commit the write pointer
1058c2ecf20Sopenharmony_ci *
1068c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1078c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
1088c2ecf20Sopenharmony_ci *
1098c2ecf20Sopenharmony_ci * Write the wptr back to the hardware (CIK+).
1108c2ecf20Sopenharmony_ci */
1118c2ecf20Sopenharmony_civoid cik_sdma_set_wptr(struct radeon_device *rdev,
1128c2ecf20Sopenharmony_ci		       struct radeon_ring *ring)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	u32 reg;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1178c2ecf20Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
1188c2ecf20Sopenharmony_ci	else
1198c2ecf20Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	WREG32(reg, (ring->wptr << 2) & 0x3fffc);
1228c2ecf20Sopenharmony_ci	(void)RREG32(reg);
1238c2ecf20Sopenharmony_ci}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci/**
1268c2ecf20Sopenharmony_ci * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
1278c2ecf20Sopenharmony_ci *
1288c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1298c2ecf20Sopenharmony_ci * @ib: IB object to schedule
1308c2ecf20Sopenharmony_ci *
1318c2ecf20Sopenharmony_ci * Schedule an IB in the DMA ring (CIK).
1328c2ecf20Sopenharmony_ci */
1338c2ecf20Sopenharmony_civoid cik_sdma_ring_ib_execute(struct radeon_device *rdev,
1348c2ecf20Sopenharmony_ci			      struct radeon_ib *ib)
1358c2ecf20Sopenharmony_ci{
1368c2ecf20Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ib->ring];
1378c2ecf20Sopenharmony_ci	u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	if (rdev->wb.enabled) {
1408c2ecf20Sopenharmony_ci		u32 next_rptr = ring->wptr + 5;
1418c2ecf20Sopenharmony_ci		while ((next_rptr & 7) != 4)
1428c2ecf20Sopenharmony_ci			next_rptr++;
1438c2ecf20Sopenharmony_ci		next_rptr += 4;
1448c2ecf20Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
1458c2ecf20Sopenharmony_ci		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1468c2ecf20Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
1478c2ecf20Sopenharmony_ci		radeon_ring_write(ring, 1); /* number of DWs to follow */
1488c2ecf20Sopenharmony_ci		radeon_ring_write(ring, next_rptr);
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	/* IB packet must end on a 8 DW boundary */
1528c2ecf20Sopenharmony_ci	while ((ring->wptr & 7) != 4)
1538c2ecf20Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
1548c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
1558c2ecf20Sopenharmony_ci	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
1568c2ecf20Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
1578c2ecf20Sopenharmony_ci	radeon_ring_write(ring, ib->length_dw);
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci}
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci/**
1628c2ecf20Sopenharmony_ci * cik_sdma_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
1638c2ecf20Sopenharmony_ci *
1648c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1658c2ecf20Sopenharmony_ci * @ridx: radeon ring index
1668c2ecf20Sopenharmony_ci *
1678c2ecf20Sopenharmony_ci * Emit an hdp flush packet on the requested DMA ring.
1688c2ecf20Sopenharmony_ci */
1698c2ecf20Sopenharmony_cistatic void cik_sdma_hdp_flush_ring_emit(struct radeon_device *rdev,
1708c2ecf20Sopenharmony_ci					 int ridx)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ridx];
1738c2ecf20Sopenharmony_ci	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
1748c2ecf20Sopenharmony_ci			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
1758c2ecf20Sopenharmony_ci	u32 ref_and_mask;
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	if (ridx == R600_RING_TYPE_DMA_INDEX)
1788c2ecf20Sopenharmony_ci		ref_and_mask = SDMA0;
1798c2ecf20Sopenharmony_ci	else
1808c2ecf20Sopenharmony_ci		ref_and_mask = SDMA1;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
1838c2ecf20Sopenharmony_ci	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
1848c2ecf20Sopenharmony_ci	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
1858c2ecf20Sopenharmony_ci	radeon_ring_write(ring, ref_and_mask); /* reference */
1868c2ecf20Sopenharmony_ci	radeon_ring_write(ring, ref_and_mask); /* mask */
1878c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
1888c2ecf20Sopenharmony_ci}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci/**
1918c2ecf20Sopenharmony_ci * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
1928c2ecf20Sopenharmony_ci *
1938c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1948c2ecf20Sopenharmony_ci * @fence: radeon fence object
1958c2ecf20Sopenharmony_ci *
1968c2ecf20Sopenharmony_ci * Add a DMA fence packet to the ring to write
1978c2ecf20Sopenharmony_ci * the fence seq number and DMA trap packet to generate
1988c2ecf20Sopenharmony_ci * an interrupt if needed (CIK).
1998c2ecf20Sopenharmony_ci */
2008c2ecf20Sopenharmony_civoid cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2018c2ecf20Sopenharmony_ci			      struct radeon_fence *fence)
2028c2ecf20Sopenharmony_ci{
2038c2ecf20Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[fence->ring];
2048c2ecf20Sopenharmony_ci	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	/* write the fence */
2078c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2088c2ecf20Sopenharmony_ci	radeon_ring_write(ring, lower_32_bits(addr));
2098c2ecf20Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(addr));
2108c2ecf20Sopenharmony_ci	radeon_ring_write(ring, fence->seq);
2118c2ecf20Sopenharmony_ci	/* generate an interrupt */
2128c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2138c2ecf20Sopenharmony_ci	/* flush HDP */
2148c2ecf20Sopenharmony_ci	cik_sdma_hdp_flush_ring_emit(rdev, fence->ring);
2158c2ecf20Sopenharmony_ci}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci/**
2188c2ecf20Sopenharmony_ci * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2198c2ecf20Sopenharmony_ci *
2208c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
2218c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information
2228c2ecf20Sopenharmony_ci * @semaphore: radeon semaphore object
2238c2ecf20Sopenharmony_ci * @emit_wait: wait or signal semaphore
2248c2ecf20Sopenharmony_ci *
2258c2ecf20Sopenharmony_ci * Add a DMA semaphore packet to the ring wait on or signal
2268c2ecf20Sopenharmony_ci * other rings (CIK).
2278c2ecf20Sopenharmony_ci */
2288c2ecf20Sopenharmony_cibool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2298c2ecf20Sopenharmony_ci				  struct radeon_ring *ring,
2308c2ecf20Sopenharmony_ci				  struct radeon_semaphore *semaphore,
2318c2ecf20Sopenharmony_ci				  bool emit_wait)
2328c2ecf20Sopenharmony_ci{
2338c2ecf20Sopenharmony_ci	u64 addr = semaphore->gpu_addr;
2348c2ecf20Sopenharmony_ci	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2378c2ecf20Sopenharmony_ci	radeon_ring_write(ring, addr & 0xfffffff8);
2388c2ecf20Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(addr));
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	return true;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/**
2448c2ecf20Sopenharmony_ci * cik_sdma_gfx_stop - stop the gfx async dma engines
2458c2ecf20Sopenharmony_ci *
2468c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
2478c2ecf20Sopenharmony_ci *
2488c2ecf20Sopenharmony_ci * Stop the gfx async dma ring buffers (CIK).
2498c2ecf20Sopenharmony_ci */
2508c2ecf20Sopenharmony_cistatic void cik_sdma_gfx_stop(struct radeon_device *rdev)
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	u32 rb_cntl, reg_offset;
2538c2ecf20Sopenharmony_ci	int i;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
2568c2ecf20Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
2578c2ecf20Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
2608c2ecf20Sopenharmony_ci		if (i == 0)
2618c2ecf20Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
2628c2ecf20Sopenharmony_ci		else
2638c2ecf20Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
2648c2ecf20Sopenharmony_ci		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2658c2ecf20Sopenharmony_ci		rb_cntl &= ~SDMA_RB_ENABLE;
2668c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2678c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2688c2ecf20Sopenharmony_ci	}
2698c2ecf20Sopenharmony_ci	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2708c2ecf20Sopenharmony_ci	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	/* FIXME use something else than big hammer but after few days can not
2738c2ecf20Sopenharmony_ci	 * seem to find good combination so reset SDMA blocks as it seems we
2748c2ecf20Sopenharmony_ci	 * do not shut them down properly. This fix hibernation and does not
2758c2ecf20Sopenharmony_ci	 * affect suspend to ram.
2768c2ecf20Sopenharmony_ci	 */
2778c2ecf20Sopenharmony_ci	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2788c2ecf20Sopenharmony_ci	(void)RREG32(SRBM_SOFT_RESET);
2798c2ecf20Sopenharmony_ci	udelay(50);
2808c2ecf20Sopenharmony_ci	WREG32(SRBM_SOFT_RESET, 0);
2818c2ecf20Sopenharmony_ci	(void)RREG32(SRBM_SOFT_RESET);
2828c2ecf20Sopenharmony_ci}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci/**
2858c2ecf20Sopenharmony_ci * cik_sdma_rlc_stop - stop the compute async dma engines
2868c2ecf20Sopenharmony_ci *
2878c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
2888c2ecf20Sopenharmony_ci *
2898c2ecf20Sopenharmony_ci * Stop the compute async dma queues (CIK).
2908c2ecf20Sopenharmony_ci */
2918c2ecf20Sopenharmony_cistatic void cik_sdma_rlc_stop(struct radeon_device *rdev)
2928c2ecf20Sopenharmony_ci{
2938c2ecf20Sopenharmony_ci	/* XXX todo */
2948c2ecf20Sopenharmony_ci}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci/**
2978c2ecf20Sopenharmony_ci * cik_sdma_ctx_switch_enable - enable/disable sdma engine preemption
2988c2ecf20Sopenharmony_ci *
2998c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3008c2ecf20Sopenharmony_ci * @enable: enable/disable preemption.
3018c2ecf20Sopenharmony_ci *
3028c2ecf20Sopenharmony_ci * Halt or unhalt the async dma engines (CIK).
3038c2ecf20Sopenharmony_ci */
3048c2ecf20Sopenharmony_cistatic void cik_sdma_ctx_switch_enable(struct radeon_device *rdev, bool enable)
3058c2ecf20Sopenharmony_ci{
3068c2ecf20Sopenharmony_ci	uint32_t reg_offset, value;
3078c2ecf20Sopenharmony_ci	int i;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
3108c2ecf20Sopenharmony_ci		if (i == 0)
3118c2ecf20Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
3128c2ecf20Sopenharmony_ci		else
3138c2ecf20Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
3148c2ecf20Sopenharmony_ci		value = RREG32(SDMA0_CNTL + reg_offset);
3158c2ecf20Sopenharmony_ci		if (enable)
3168c2ecf20Sopenharmony_ci			value |= AUTO_CTXSW_ENABLE;
3178c2ecf20Sopenharmony_ci		else
3188c2ecf20Sopenharmony_ci			value &= ~AUTO_CTXSW_ENABLE;
3198c2ecf20Sopenharmony_ci		WREG32(SDMA0_CNTL + reg_offset, value);
3208c2ecf20Sopenharmony_ci	}
3218c2ecf20Sopenharmony_ci}
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci/**
3248c2ecf20Sopenharmony_ci * cik_sdma_enable - stop the async dma engines
3258c2ecf20Sopenharmony_ci *
3268c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3278c2ecf20Sopenharmony_ci * @enable: enable/disable the DMA MEs.
3288c2ecf20Sopenharmony_ci *
3298c2ecf20Sopenharmony_ci * Halt or unhalt the async dma engines (CIK).
3308c2ecf20Sopenharmony_ci */
3318c2ecf20Sopenharmony_civoid cik_sdma_enable(struct radeon_device *rdev, bool enable)
3328c2ecf20Sopenharmony_ci{
3338c2ecf20Sopenharmony_ci	u32 me_cntl, reg_offset;
3348c2ecf20Sopenharmony_ci	int i;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	if (!enable) {
3378c2ecf20Sopenharmony_ci		cik_sdma_gfx_stop(rdev);
3388c2ecf20Sopenharmony_ci		cik_sdma_rlc_stop(rdev);
3398c2ecf20Sopenharmony_ci	}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
3428c2ecf20Sopenharmony_ci		if (i == 0)
3438c2ecf20Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
3448c2ecf20Sopenharmony_ci		else
3458c2ecf20Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
3468c2ecf20Sopenharmony_ci		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3478c2ecf20Sopenharmony_ci		if (enable)
3488c2ecf20Sopenharmony_ci			me_cntl &= ~SDMA_HALT;
3498c2ecf20Sopenharmony_ci		else
3508c2ecf20Sopenharmony_ci			me_cntl |= SDMA_HALT;
3518c2ecf20Sopenharmony_ci		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	cik_sdma_ctx_switch_enable(rdev, enable);
3558c2ecf20Sopenharmony_ci}
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci/**
3588c2ecf20Sopenharmony_ci * cik_sdma_gfx_resume - setup and start the async dma engines
3598c2ecf20Sopenharmony_ci *
3608c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3618c2ecf20Sopenharmony_ci *
3628c2ecf20Sopenharmony_ci * Set up the gfx DMA ring buffers and enable them (CIK).
3638c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
3648c2ecf20Sopenharmony_ci */
3658c2ecf20Sopenharmony_cistatic int cik_sdma_gfx_resume(struct radeon_device *rdev)
3668c2ecf20Sopenharmony_ci{
3678c2ecf20Sopenharmony_ci	struct radeon_ring *ring;
3688c2ecf20Sopenharmony_ci	u32 rb_cntl, ib_cntl;
3698c2ecf20Sopenharmony_ci	u32 rb_bufsz;
3708c2ecf20Sopenharmony_ci	u32 reg_offset, wb_offset;
3718c2ecf20Sopenharmony_ci	int i, r;
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
3748c2ecf20Sopenharmony_ci		if (i == 0) {
3758c2ecf20Sopenharmony_ci			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3768c2ecf20Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
3778c2ecf20Sopenharmony_ci			wb_offset = R600_WB_DMA_RPTR_OFFSET;
3788c2ecf20Sopenharmony_ci		} else {
3798c2ecf20Sopenharmony_ci			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3808c2ecf20Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
3818c2ecf20Sopenharmony_ci			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3828c2ecf20Sopenharmony_ci		}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3858c2ecf20Sopenharmony_ci		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci		/* Set ring buffer size in dwords */
3888c2ecf20Sopenharmony_ci		rb_bufsz = order_base_2(ring->ring_size / 4);
3898c2ecf20Sopenharmony_ci		rb_cntl = rb_bufsz << 1;
3908c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
3918c2ecf20Sopenharmony_ci		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3928c2ecf20Sopenharmony_ci#endif
3938c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci		/* Initialize the ring buffer's read and write pointers */
3968c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3978c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci		/* set the wb address whether it's enabled or not */
4008c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4018c2ecf20Sopenharmony_ci		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4028c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4038c2ecf20Sopenharmony_ci		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci		if (rdev->wb.enabled)
4068c2ecf20Sopenharmony_ci			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4098c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci		ring->wptr = 0;
4128c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci		/* enable DMA RB */
4158c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci		ib_cntl = SDMA_IB_ENABLE;
4188c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
4198c2ecf20Sopenharmony_ci		ib_cntl |= SDMA_IB_SWAP_ENABLE;
4208c2ecf20Sopenharmony_ci#endif
4218c2ecf20Sopenharmony_ci		/* enable DMA IBs */
4228c2ecf20Sopenharmony_ci		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci		ring->ready = true;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci		r = radeon_ring_test(rdev, ring->idx, ring);
4278c2ecf20Sopenharmony_ci		if (r) {
4288c2ecf20Sopenharmony_ci			ring->ready = false;
4298c2ecf20Sopenharmony_ci			return r;
4308c2ecf20Sopenharmony_ci		}
4318c2ecf20Sopenharmony_ci	}
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
4348c2ecf20Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
4358c2ecf20Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	return 0;
4388c2ecf20Sopenharmony_ci}
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci/**
4418c2ecf20Sopenharmony_ci * cik_sdma_rlc_resume - setup and start the async dma engines
4428c2ecf20Sopenharmony_ci *
4438c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
4448c2ecf20Sopenharmony_ci *
4458c2ecf20Sopenharmony_ci * Set up the compute DMA queues and enable them (CIK).
4468c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
4478c2ecf20Sopenharmony_ci */
4488c2ecf20Sopenharmony_cistatic int cik_sdma_rlc_resume(struct radeon_device *rdev)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	/* XXX todo */
4518c2ecf20Sopenharmony_ci	return 0;
4528c2ecf20Sopenharmony_ci}
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci/**
4558c2ecf20Sopenharmony_ci * cik_sdma_load_microcode - load the sDMA ME ucode
4568c2ecf20Sopenharmony_ci *
4578c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
4588c2ecf20Sopenharmony_ci *
4598c2ecf20Sopenharmony_ci * Loads the sDMA0/1 ucode.
4608c2ecf20Sopenharmony_ci * Returns 0 for success, -EINVAL if the ucode is not available.
4618c2ecf20Sopenharmony_ci */
4628c2ecf20Sopenharmony_cistatic int cik_sdma_load_microcode(struct radeon_device *rdev)
4638c2ecf20Sopenharmony_ci{
4648c2ecf20Sopenharmony_ci	int i;
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	if (!rdev->sdma_fw)
4678c2ecf20Sopenharmony_ci		return -EINVAL;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	/* halt the MEs */
4708c2ecf20Sopenharmony_ci	cik_sdma_enable(rdev, false);
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	if (rdev->new_fw) {
4738c2ecf20Sopenharmony_ci		const struct sdma_firmware_header_v1_0 *hdr =
4748c2ecf20Sopenharmony_ci			(const struct sdma_firmware_header_v1_0 *)rdev->sdma_fw->data;
4758c2ecf20Sopenharmony_ci		const __le32 *fw_data;
4768c2ecf20Sopenharmony_ci		u32 fw_size;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci		radeon_ucode_print_sdma_hdr(&hdr->header);
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci		/* sdma0 */
4818c2ecf20Sopenharmony_ci		fw_data = (const __le32 *)
4828c2ecf20Sopenharmony_ci			(rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4838c2ecf20Sopenharmony_ci		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4848c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4858c2ecf20Sopenharmony_ci		for (i = 0; i < fw_size; i++)
4868c2ecf20Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, le32_to_cpup(fw_data++));
4878c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci		/* sdma1 */
4908c2ecf20Sopenharmony_ci		fw_data = (const __le32 *)
4918c2ecf20Sopenharmony_ci			(rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4928c2ecf20Sopenharmony_ci		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4938c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4948c2ecf20Sopenharmony_ci		for (i = 0; i < fw_size; i++)
4958c2ecf20Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, le32_to_cpup(fw_data++));
4968c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4978c2ecf20Sopenharmony_ci	} else {
4988c2ecf20Sopenharmony_ci		const __be32 *fw_data;
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci		/* sdma0 */
5018c2ecf20Sopenharmony_ci		fw_data = (const __be32 *)rdev->sdma_fw->data;
5028c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
5038c2ecf20Sopenharmony_ci		for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
5048c2ecf20Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
5058c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci		/* sdma1 */
5088c2ecf20Sopenharmony_ci		fw_data = (const __be32 *)rdev->sdma_fw->data;
5098c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
5108c2ecf20Sopenharmony_ci		for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
5118c2ecf20Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
5128c2ecf20Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
5138c2ecf20Sopenharmony_ci	}
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
5168c2ecf20Sopenharmony_ci	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
5178c2ecf20Sopenharmony_ci	return 0;
5188c2ecf20Sopenharmony_ci}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_ci/**
5218c2ecf20Sopenharmony_ci * cik_sdma_resume - setup and start the async dma engines
5228c2ecf20Sopenharmony_ci *
5238c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
5248c2ecf20Sopenharmony_ci *
5258c2ecf20Sopenharmony_ci * Set up the DMA engines and enable them (CIK).
5268c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
5278c2ecf20Sopenharmony_ci */
5288c2ecf20Sopenharmony_ciint cik_sdma_resume(struct radeon_device *rdev)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	int r;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	r = cik_sdma_load_microcode(rdev);
5338c2ecf20Sopenharmony_ci	if (r)
5348c2ecf20Sopenharmony_ci		return r;
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	/* unhalt the MEs */
5378c2ecf20Sopenharmony_ci	cik_sdma_enable(rdev, true);
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	/* start the gfx rings and rlc compute queues */
5408c2ecf20Sopenharmony_ci	r = cik_sdma_gfx_resume(rdev);
5418c2ecf20Sopenharmony_ci	if (r)
5428c2ecf20Sopenharmony_ci		return r;
5438c2ecf20Sopenharmony_ci	r = cik_sdma_rlc_resume(rdev);
5448c2ecf20Sopenharmony_ci	if (r)
5458c2ecf20Sopenharmony_ci		return r;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	return 0;
5488c2ecf20Sopenharmony_ci}
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci/**
5518c2ecf20Sopenharmony_ci * cik_sdma_fini - tear down the async dma engines
5528c2ecf20Sopenharmony_ci *
5538c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
5548c2ecf20Sopenharmony_ci *
5558c2ecf20Sopenharmony_ci * Stop the async dma engines and free the rings (CIK).
5568c2ecf20Sopenharmony_ci */
5578c2ecf20Sopenharmony_civoid cik_sdma_fini(struct radeon_device *rdev)
5588c2ecf20Sopenharmony_ci{
5598c2ecf20Sopenharmony_ci	/* halt the MEs */
5608c2ecf20Sopenharmony_ci	cik_sdma_enable(rdev, false);
5618c2ecf20Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
5628c2ecf20Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
5638c2ecf20Sopenharmony_ci	/* XXX - compute dma queue tear down */
5648c2ecf20Sopenharmony_ci}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci/**
5678c2ecf20Sopenharmony_ci * cik_copy_dma - copy pages using the DMA engine
5688c2ecf20Sopenharmony_ci *
5698c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
5708c2ecf20Sopenharmony_ci * @src_offset: src GPU address
5718c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address
5728c2ecf20Sopenharmony_ci * @num_gpu_pages: number of GPU pages to xfer
5738c2ecf20Sopenharmony_ci * @resv: reservation object to sync to
5748c2ecf20Sopenharmony_ci *
5758c2ecf20Sopenharmony_ci * Copy GPU paging using the DMA engine (CIK).
5768c2ecf20Sopenharmony_ci * Used by the radeon ttm implementation to move pages if
5778c2ecf20Sopenharmony_ci * registered as the asic copy callback.
5788c2ecf20Sopenharmony_ci */
5798c2ecf20Sopenharmony_cistruct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
5808c2ecf20Sopenharmony_ci				  uint64_t src_offset, uint64_t dst_offset,
5818c2ecf20Sopenharmony_ci				  unsigned num_gpu_pages,
5828c2ecf20Sopenharmony_ci				  struct dma_resv *resv)
5838c2ecf20Sopenharmony_ci{
5848c2ecf20Sopenharmony_ci	struct radeon_fence *fence;
5858c2ecf20Sopenharmony_ci	struct radeon_sync sync;
5868c2ecf20Sopenharmony_ci	int ring_index = rdev->asic->copy.dma_ring_index;
5878c2ecf20Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ring_index];
5888c2ecf20Sopenharmony_ci	u32 size_in_bytes, cur_size_in_bytes;
5898c2ecf20Sopenharmony_ci	int i, num_loops;
5908c2ecf20Sopenharmony_ci	int r = 0;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	radeon_sync_create(&sync);
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5958c2ecf20Sopenharmony_ci	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
5968c2ecf20Sopenharmony_ci	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
5978c2ecf20Sopenharmony_ci	if (r) {
5988c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: moving bo (%d).\n", r);
5998c2ecf20Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
6008c2ecf20Sopenharmony_ci		return ERR_PTR(r);
6018c2ecf20Sopenharmony_ci	}
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	radeon_sync_resv(rdev, &sync, resv, false);
6048c2ecf20Sopenharmony_ci	radeon_sync_rings(rdev, &sync, ring->idx);
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci	for (i = 0; i < num_loops; i++) {
6078c2ecf20Sopenharmony_ci		cur_size_in_bytes = size_in_bytes;
6088c2ecf20Sopenharmony_ci		if (cur_size_in_bytes > 0x1fffff)
6098c2ecf20Sopenharmony_ci			cur_size_in_bytes = 0x1fffff;
6108c2ecf20Sopenharmony_ci		size_in_bytes -= cur_size_in_bytes;
6118c2ecf20Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
6128c2ecf20Sopenharmony_ci		radeon_ring_write(ring, cur_size_in_bytes);
6138c2ecf20Sopenharmony_ci		radeon_ring_write(ring, 0); /* src/dst endian swap */
6148c2ecf20Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(src_offset));
6158c2ecf20Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(src_offset));
6168c2ecf20Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(dst_offset));
6178c2ecf20Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(dst_offset));
6188c2ecf20Sopenharmony_ci		src_offset += cur_size_in_bytes;
6198c2ecf20Sopenharmony_ci		dst_offset += cur_size_in_bytes;
6208c2ecf20Sopenharmony_ci	}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	r = radeon_fence_emit(rdev, &fence, ring->idx);
6238c2ecf20Sopenharmony_ci	if (r) {
6248c2ecf20Sopenharmony_ci		radeon_ring_unlock_undo(rdev, ring);
6258c2ecf20Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
6268c2ecf20Sopenharmony_ci		return ERR_PTR(r);
6278c2ecf20Sopenharmony_ci	}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	radeon_ring_unlock_commit(rdev, ring, false);
6308c2ecf20Sopenharmony_ci	radeon_sync_free(rdev, &sync, fence);
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	return fence;
6338c2ecf20Sopenharmony_ci}
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci/**
6368c2ecf20Sopenharmony_ci * cik_sdma_ring_test - simple async dma engine test
6378c2ecf20Sopenharmony_ci *
6388c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
6398c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information
6408c2ecf20Sopenharmony_ci *
6418c2ecf20Sopenharmony_ci * Test the DMA engine by writing using it to write an
6428c2ecf20Sopenharmony_ci * value to memory. (CIK).
6438c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
6448c2ecf20Sopenharmony_ci */
6458c2ecf20Sopenharmony_ciint cik_sdma_ring_test(struct radeon_device *rdev,
6468c2ecf20Sopenharmony_ci		       struct radeon_ring *ring)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	unsigned i;
6498c2ecf20Sopenharmony_ci	int r;
6508c2ecf20Sopenharmony_ci	unsigned index;
6518c2ecf20Sopenharmony_ci	u32 tmp;
6528c2ecf20Sopenharmony_ci	u64 gpu_addr;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
6558c2ecf20Sopenharmony_ci		index = R600_WB_DMA_RING_TEST_OFFSET;
6568c2ecf20Sopenharmony_ci	else
6578c2ecf20Sopenharmony_ci		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	gpu_addr = rdev->wb.gpu_addr + index;
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	tmp = 0xCAFEDEAD;
6628c2ecf20Sopenharmony_ci	rdev->wb.wb[index/4] = cpu_to_le32(tmp);
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	r = radeon_ring_lock(rdev, ring, 5);
6658c2ecf20Sopenharmony_ci	if (r) {
6668c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
6678c2ecf20Sopenharmony_ci		return r;
6688c2ecf20Sopenharmony_ci	}
6698c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
6708c2ecf20Sopenharmony_ci	radeon_ring_write(ring, lower_32_bits(gpu_addr));
6718c2ecf20Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(gpu_addr));
6728c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 1); /* number of DWs to follow */
6738c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0xDEADBEEF);
6748c2ecf20Sopenharmony_ci	radeon_ring_unlock_commit(rdev, ring, false);
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	for (i = 0; i < rdev->usec_timeout; i++) {
6778c2ecf20Sopenharmony_ci		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
6788c2ecf20Sopenharmony_ci		if (tmp == 0xDEADBEEF)
6798c2ecf20Sopenharmony_ci			break;
6808c2ecf20Sopenharmony_ci		udelay(1);
6818c2ecf20Sopenharmony_ci	}
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	if (i < rdev->usec_timeout) {
6848c2ecf20Sopenharmony_ci		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
6858c2ecf20Sopenharmony_ci	} else {
6868c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
6878c2ecf20Sopenharmony_ci			  ring->idx, tmp);
6888c2ecf20Sopenharmony_ci		r = -EINVAL;
6898c2ecf20Sopenharmony_ci	}
6908c2ecf20Sopenharmony_ci	return r;
6918c2ecf20Sopenharmony_ci}
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci/**
6948c2ecf20Sopenharmony_ci * cik_sdma_ib_test - test an IB on the DMA engine
6958c2ecf20Sopenharmony_ci *
6968c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
6978c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information
6988c2ecf20Sopenharmony_ci *
6998c2ecf20Sopenharmony_ci * Test a simple IB in the DMA ring (CIK).
7008c2ecf20Sopenharmony_ci * Returns 0 on success, error on failure.
7018c2ecf20Sopenharmony_ci */
7028c2ecf20Sopenharmony_ciint cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
7038c2ecf20Sopenharmony_ci{
7048c2ecf20Sopenharmony_ci	struct radeon_ib ib;
7058c2ecf20Sopenharmony_ci	unsigned i;
7068c2ecf20Sopenharmony_ci	unsigned index;
7078c2ecf20Sopenharmony_ci	int r;
7088c2ecf20Sopenharmony_ci	u32 tmp = 0;
7098c2ecf20Sopenharmony_ci	u64 gpu_addr;
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
7128c2ecf20Sopenharmony_ci		index = R600_WB_DMA_RING_TEST_OFFSET;
7138c2ecf20Sopenharmony_ci	else
7148c2ecf20Sopenharmony_ci		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	gpu_addr = rdev->wb.gpu_addr + index;
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ci	tmp = 0xCAFEDEAD;
7198c2ecf20Sopenharmony_ci	rdev->wb.wb[index/4] = cpu_to_le32(tmp);
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
7228c2ecf20Sopenharmony_ci	if (r) {
7238c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
7248c2ecf20Sopenharmony_ci		return r;
7258c2ecf20Sopenharmony_ci	}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
7288c2ecf20Sopenharmony_ci	ib.ptr[1] = lower_32_bits(gpu_addr);
7298c2ecf20Sopenharmony_ci	ib.ptr[2] = upper_32_bits(gpu_addr);
7308c2ecf20Sopenharmony_ci	ib.ptr[3] = 1;
7318c2ecf20Sopenharmony_ci	ib.ptr[4] = 0xDEADBEEF;
7328c2ecf20Sopenharmony_ci	ib.length_dw = 5;
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ci	r = radeon_ib_schedule(rdev, &ib, NULL, false);
7358c2ecf20Sopenharmony_ci	if (r) {
7368c2ecf20Sopenharmony_ci		radeon_ib_free(rdev, &ib);
7378c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
7388c2ecf20Sopenharmony_ci		return r;
7398c2ecf20Sopenharmony_ci	}
7408c2ecf20Sopenharmony_ci	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
7418c2ecf20Sopenharmony_ci		RADEON_USEC_IB_TEST_TIMEOUT));
7428c2ecf20Sopenharmony_ci	if (r < 0) {
7438c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
7448c2ecf20Sopenharmony_ci		return r;
7458c2ecf20Sopenharmony_ci	} else if (r == 0) {
7468c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: fence wait timed out.\n");
7478c2ecf20Sopenharmony_ci		return -ETIMEDOUT;
7488c2ecf20Sopenharmony_ci	}
7498c2ecf20Sopenharmony_ci	r = 0;
7508c2ecf20Sopenharmony_ci	for (i = 0; i < rdev->usec_timeout; i++) {
7518c2ecf20Sopenharmony_ci		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
7528c2ecf20Sopenharmony_ci		if (tmp == 0xDEADBEEF)
7538c2ecf20Sopenharmony_ci			break;
7548c2ecf20Sopenharmony_ci		udelay(1);
7558c2ecf20Sopenharmony_ci	}
7568c2ecf20Sopenharmony_ci	if (i < rdev->usec_timeout) {
7578c2ecf20Sopenharmony_ci		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
7588c2ecf20Sopenharmony_ci	} else {
7598c2ecf20Sopenharmony_ci		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
7608c2ecf20Sopenharmony_ci		r = -EINVAL;
7618c2ecf20Sopenharmony_ci	}
7628c2ecf20Sopenharmony_ci	radeon_ib_free(rdev, &ib);
7638c2ecf20Sopenharmony_ci	return r;
7648c2ecf20Sopenharmony_ci}
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci/**
7678c2ecf20Sopenharmony_ci * cik_sdma_is_lockup - Check if the DMA engine is locked up
7688c2ecf20Sopenharmony_ci *
7698c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
7708c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information
7718c2ecf20Sopenharmony_ci *
7728c2ecf20Sopenharmony_ci * Check if the async DMA engine is locked up (CIK).
7738c2ecf20Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not.
7748c2ecf20Sopenharmony_ci */
7758c2ecf20Sopenharmony_cibool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
7768c2ecf20Sopenharmony_ci{
7778c2ecf20Sopenharmony_ci	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
7788c2ecf20Sopenharmony_ci	u32 mask;
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
7818c2ecf20Sopenharmony_ci		mask = RADEON_RESET_DMA;
7828c2ecf20Sopenharmony_ci	else
7838c2ecf20Sopenharmony_ci		mask = RADEON_RESET_DMA1;
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	if (!(reset_mask & mask)) {
7868c2ecf20Sopenharmony_ci		radeon_ring_lockup_update(rdev, ring);
7878c2ecf20Sopenharmony_ci		return false;
7888c2ecf20Sopenharmony_ci	}
7898c2ecf20Sopenharmony_ci	return radeon_ring_test_lockup(rdev, ring);
7908c2ecf20Sopenharmony_ci}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci/**
7938c2ecf20Sopenharmony_ci * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART
7948c2ecf20Sopenharmony_ci *
7958c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
7968c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
7978c2ecf20Sopenharmony_ci * @pe: addr of the page entry
7988c2ecf20Sopenharmony_ci * @src: src addr to copy from
7998c2ecf20Sopenharmony_ci * @count: number of page entries to update
8008c2ecf20Sopenharmony_ci *
8018c2ecf20Sopenharmony_ci * Update PTEs by copying them from the GART using sDMA (CIK).
8028c2ecf20Sopenharmony_ci */
8038c2ecf20Sopenharmony_civoid cik_sdma_vm_copy_pages(struct radeon_device *rdev,
8048c2ecf20Sopenharmony_ci			    struct radeon_ib *ib,
8058c2ecf20Sopenharmony_ci			    uint64_t pe, uint64_t src,
8068c2ecf20Sopenharmony_ci			    unsigned count)
8078c2ecf20Sopenharmony_ci{
8088c2ecf20Sopenharmony_ci	while (count) {
8098c2ecf20Sopenharmony_ci		unsigned bytes = count * 8;
8108c2ecf20Sopenharmony_ci		if (bytes > 0x1FFFF8)
8118c2ecf20Sopenharmony_ci			bytes = 0x1FFFF8;
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
8148c2ecf20Sopenharmony_ci			SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
8158c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = bytes;
8168c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
8178c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(src);
8188c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(src);
8198c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
8208c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci		pe += bytes;
8238c2ecf20Sopenharmony_ci		src += bytes;
8248c2ecf20Sopenharmony_ci		count -= bytes / 8;
8258c2ecf20Sopenharmony_ci	}
8268c2ecf20Sopenharmony_ci}
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci/**
8298c2ecf20Sopenharmony_ci * cik_sdma_vm_write_pages - update PTEs by writing them manually
8308c2ecf20Sopenharmony_ci *
8318c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
8328c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
8338c2ecf20Sopenharmony_ci * @pe: addr of the page entry
8348c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe
8358c2ecf20Sopenharmony_ci * @count: number of page entries to update
8368c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
8378c2ecf20Sopenharmony_ci * @flags: access flags
8388c2ecf20Sopenharmony_ci *
8398c2ecf20Sopenharmony_ci * Update PTEs by writing them manually using sDMA (CIK).
8408c2ecf20Sopenharmony_ci */
8418c2ecf20Sopenharmony_civoid cik_sdma_vm_write_pages(struct radeon_device *rdev,
8428c2ecf20Sopenharmony_ci			     struct radeon_ib *ib,
8438c2ecf20Sopenharmony_ci			     uint64_t pe,
8448c2ecf20Sopenharmony_ci			     uint64_t addr, unsigned count,
8458c2ecf20Sopenharmony_ci			     uint32_t incr, uint32_t flags)
8468c2ecf20Sopenharmony_ci{
8478c2ecf20Sopenharmony_ci	uint64_t value;
8488c2ecf20Sopenharmony_ci	unsigned ndw;
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	while (count) {
8518c2ecf20Sopenharmony_ci		ndw = count * 2;
8528c2ecf20Sopenharmony_ci		if (ndw > 0xFFFFE)
8538c2ecf20Sopenharmony_ci			ndw = 0xFFFFE;
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci		/* for non-physically contiguous pages (system) */
8568c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
8578c2ecf20Sopenharmony_ci			SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
8588c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe;
8598c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
8608c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = ndw;
8618c2ecf20Sopenharmony_ci		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
8628c2ecf20Sopenharmony_ci			if (flags & R600_PTE_SYSTEM) {
8638c2ecf20Sopenharmony_ci				value = radeon_vm_map_gart(rdev, addr);
8648c2ecf20Sopenharmony_ci			} else if (flags & R600_PTE_VALID) {
8658c2ecf20Sopenharmony_ci				value = addr;
8668c2ecf20Sopenharmony_ci			} else {
8678c2ecf20Sopenharmony_ci				value = 0;
8688c2ecf20Sopenharmony_ci			}
8698c2ecf20Sopenharmony_ci			addr += incr;
8708c2ecf20Sopenharmony_ci			value |= flags;
8718c2ecf20Sopenharmony_ci			ib->ptr[ib->length_dw++] = value;
8728c2ecf20Sopenharmony_ci			ib->ptr[ib->length_dw++] = upper_32_bits(value);
8738c2ecf20Sopenharmony_ci		}
8748c2ecf20Sopenharmony_ci	}
8758c2ecf20Sopenharmony_ci}
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci/**
8788c2ecf20Sopenharmony_ci * cik_sdma_vm_set_pages - update the page tables using sDMA
8798c2ecf20Sopenharmony_ci *
8808c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
8818c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
8828c2ecf20Sopenharmony_ci * @pe: addr of the page entry
8838c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe
8848c2ecf20Sopenharmony_ci * @count: number of page entries to update
8858c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
8868c2ecf20Sopenharmony_ci * @flags: access flags
8878c2ecf20Sopenharmony_ci *
8888c2ecf20Sopenharmony_ci * Update the page tables using sDMA (CIK).
8898c2ecf20Sopenharmony_ci */
8908c2ecf20Sopenharmony_civoid cik_sdma_vm_set_pages(struct radeon_device *rdev,
8918c2ecf20Sopenharmony_ci			   struct radeon_ib *ib,
8928c2ecf20Sopenharmony_ci			   uint64_t pe,
8938c2ecf20Sopenharmony_ci			   uint64_t addr, unsigned count,
8948c2ecf20Sopenharmony_ci			   uint32_t incr, uint32_t flags)
8958c2ecf20Sopenharmony_ci{
8968c2ecf20Sopenharmony_ci	uint64_t value;
8978c2ecf20Sopenharmony_ci	unsigned ndw;
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	while (count) {
9008c2ecf20Sopenharmony_ci		ndw = count;
9018c2ecf20Sopenharmony_ci		if (ndw > 0x7FFFF)
9028c2ecf20Sopenharmony_ci			ndw = 0x7FFFF;
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci		if (flags & R600_PTE_VALID)
9058c2ecf20Sopenharmony_ci			value = addr;
9068c2ecf20Sopenharmony_ci		else
9078c2ecf20Sopenharmony_ci			value = 0;
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci		/* for physically contiguous pages (vram) */
9108c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
9118c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
9128c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
9138c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = flags; /* mask */
9148c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
9158c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
9168c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
9178c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
9188c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
9198c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = ndw; /* number of entries */
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci		pe += ndw * 8;
9228c2ecf20Sopenharmony_ci		addr += ndw * incr;
9238c2ecf20Sopenharmony_ci		count -= ndw;
9248c2ecf20Sopenharmony_ci	}
9258c2ecf20Sopenharmony_ci}
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci/**
9288c2ecf20Sopenharmony_ci * cik_sdma_vm_pad_ib - pad the IB to the required number of dw
9298c2ecf20Sopenharmony_ci *
9308c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with padding
9318c2ecf20Sopenharmony_ci *
9328c2ecf20Sopenharmony_ci */
9338c2ecf20Sopenharmony_civoid cik_sdma_vm_pad_ib(struct radeon_ib *ib)
9348c2ecf20Sopenharmony_ci{
9358c2ecf20Sopenharmony_ci	while (ib->length_dw & 0x7)
9368c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
9378c2ecf20Sopenharmony_ci}
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci/**
9408c2ecf20Sopenharmony_ci * cik_dma_vm_flush - cik vm flush using sDMA
9418c2ecf20Sopenharmony_ci *
9428c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
9438c2ecf20Sopenharmony_ci *
9448c2ecf20Sopenharmony_ci * Update the page table base and flush the VM TLB
9458c2ecf20Sopenharmony_ci * using sDMA (CIK).
9468c2ecf20Sopenharmony_ci */
9478c2ecf20Sopenharmony_civoid cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
9488c2ecf20Sopenharmony_ci		      unsigned vm_id, uint64_t pd_addr)
9498c2ecf20Sopenharmony_ci{
9508c2ecf20Sopenharmony_ci	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
9518c2ecf20Sopenharmony_ci			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9548c2ecf20Sopenharmony_ci	if (vm_id < 8) {
9558c2ecf20Sopenharmony_ci		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
9568c2ecf20Sopenharmony_ci	} else {
9578c2ecf20Sopenharmony_ci		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
9588c2ecf20Sopenharmony_ci	}
9598c2ecf20Sopenharmony_ci	radeon_ring_write(ring, pd_addr >> 12);
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci	/* update SH_MEM_* regs */
9628c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9638c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
9648c2ecf20Sopenharmony_ci	radeon_ring_write(ring, VMID(vm_id));
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9678c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_BASES >> 2);
9688c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0);
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9718c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
9728c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0);
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9758c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
9768c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 1);
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9798c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
9808c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0);
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9838c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
9848c2ecf20Sopenharmony_ci	radeon_ring_write(ring, VMID(0));
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	/* flush HDP */
9878c2ecf20Sopenharmony_ci	cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_ci	/* flush TLB */
9908c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9918c2ecf20Sopenharmony_ci	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
9928c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id);
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
9958c2ecf20Sopenharmony_ci	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
9968c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0);
9978c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0); /* reference */
9988c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0); /* mask */
9998c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
10008c2ecf20Sopenharmony_ci}
10018c2ecf20Sopenharmony_ci
1002