162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2013 Advanced Micro Devices, Inc.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation
762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
1262306a36Sopenharmony_ci * all copies or substantial portions of the Software.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Authors: Alex Deucher
2362306a36Sopenharmony_ci */
2462306a36Sopenharmony_ci#include <linux/firmware.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include "radeon.h"
2762306a36Sopenharmony_ci#include "radeon_ucode.h"
2862306a36Sopenharmony_ci#include "radeon_asic.h"
2962306a36Sopenharmony_ci#include "radeon_trace.h"
3062306a36Sopenharmony_ci#include "cik.h"
3162306a36Sopenharmony_ci#include "cikd.h"
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci/* sdma */
3462306a36Sopenharmony_ci#define CIK_SDMA_UCODE_SIZE 1050
3562306a36Sopenharmony_ci#define CIK_SDMA_UCODE_VERSION 64
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * sDMA - System DMA
3962306a36Sopenharmony_ci * Starting with CIK, the GPU has new asynchronous
4062306a36Sopenharmony_ci * DMA engines.  These engines are used for compute
4162306a36Sopenharmony_ci * and gfx.  There are two DMA engines (SDMA0, SDMA1)
4262306a36Sopenharmony_ci * and each one supports 1 ring buffer used for gfx
4362306a36Sopenharmony_ci * and 2 queues used for compute.
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci * The programming model is very similar to the CP
4662306a36Sopenharmony_ci * (ring buffer, IBs, etc.), but sDMA has it's own
4762306a36Sopenharmony_ci * packet format that is different from the PM4 format
4862306a36Sopenharmony_ci * used by the CP. sDMA supports copying data, writing
4962306a36Sopenharmony_ci * embedded data, solid fills, and a number of other
5062306a36Sopenharmony_ci * things.  It also has support for tiling/detiling of
5162306a36Sopenharmony_ci * buffers.
5262306a36Sopenharmony_ci */
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/**
5562306a36Sopenharmony_ci * cik_sdma_get_rptr - get the current read pointer
5662306a36Sopenharmony_ci *
5762306a36Sopenharmony_ci * @rdev: radeon_device pointer
5862306a36Sopenharmony_ci * @ring: radeon ring pointer
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * Get the current rptr from the hardware (CIK+).
6162306a36Sopenharmony_ci */
6262306a36Sopenharmony_ciuint32_t cik_sdma_get_rptr(struct radeon_device *rdev,
6362306a36Sopenharmony_ci			   struct radeon_ring *ring)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	u32 rptr, reg;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (rdev->wb.enabled) {
6862306a36Sopenharmony_ci		rptr = rdev->wb.wb[ring->rptr_offs/4];
6962306a36Sopenharmony_ci	} else {
7062306a36Sopenharmony_ci		if (ring->idx == R600_RING_TYPE_DMA_INDEX)
7162306a36Sopenharmony_ci			reg = SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET;
7262306a36Sopenharmony_ci		else
7362306a36Sopenharmony_ci			reg = SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci		rptr = RREG32(reg);
7662306a36Sopenharmony_ci	}
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	return (rptr & 0x3fffc) >> 2;
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci/**
8262306a36Sopenharmony_ci * cik_sdma_get_wptr - get the current write pointer
8362306a36Sopenharmony_ci *
8462306a36Sopenharmony_ci * @rdev: radeon_device pointer
8562306a36Sopenharmony_ci * @ring: radeon ring pointer
8662306a36Sopenharmony_ci *
8762306a36Sopenharmony_ci * Get the current wptr from the hardware (CIK+).
8862306a36Sopenharmony_ci */
8962306a36Sopenharmony_ciuint32_t cik_sdma_get_wptr(struct radeon_device *rdev,
9062306a36Sopenharmony_ci			   struct radeon_ring *ring)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	u32 reg;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
9562306a36Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
9662306a36Sopenharmony_ci	else
9762306a36Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	return (RREG32(reg) & 0x3fffc) >> 2;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci/**
10362306a36Sopenharmony_ci * cik_sdma_set_wptr - commit the write pointer
10462306a36Sopenharmony_ci *
10562306a36Sopenharmony_ci * @rdev: radeon_device pointer
10662306a36Sopenharmony_ci * @ring: radeon ring pointer
10762306a36Sopenharmony_ci *
10862306a36Sopenharmony_ci * Write the wptr back to the hardware (CIK+).
10962306a36Sopenharmony_ci */
11062306a36Sopenharmony_civoid cik_sdma_set_wptr(struct radeon_device *rdev,
11162306a36Sopenharmony_ci		       struct radeon_ring *ring)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	u32 reg;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
11662306a36Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
11762306a36Sopenharmony_ci	else
11862306a36Sopenharmony_ci		reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	WREG32(reg, (ring->wptr << 2) & 0x3fffc);
12162306a36Sopenharmony_ci	(void)RREG32(reg);
12262306a36Sopenharmony_ci}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci/**
12562306a36Sopenharmony_ci * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
12662306a36Sopenharmony_ci *
12762306a36Sopenharmony_ci * @rdev: radeon_device pointer
12862306a36Sopenharmony_ci * @ib: IB object to schedule
12962306a36Sopenharmony_ci *
13062306a36Sopenharmony_ci * Schedule an IB in the DMA ring (CIK).
13162306a36Sopenharmony_ci */
13262306a36Sopenharmony_civoid cik_sdma_ring_ib_execute(struct radeon_device *rdev,
13362306a36Sopenharmony_ci			      struct radeon_ib *ib)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ib->ring];
13662306a36Sopenharmony_ci	u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (rdev->wb.enabled) {
13962306a36Sopenharmony_ci		u32 next_rptr = ring->wptr + 5;
14062306a36Sopenharmony_ci		while ((next_rptr & 7) != 4)
14162306a36Sopenharmony_ci			next_rptr++;
14262306a36Sopenharmony_ci		next_rptr += 4;
14362306a36Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
14462306a36Sopenharmony_ci		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
14562306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
14662306a36Sopenharmony_ci		radeon_ring_write(ring, 1); /* number of DWs to follow */
14762306a36Sopenharmony_ci		radeon_ring_write(ring, next_rptr);
14862306a36Sopenharmony_ci	}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	/* IB packet must end on a 8 DW boundary */
15162306a36Sopenharmony_ci	while ((ring->wptr & 7) != 4)
15262306a36Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
15362306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
15462306a36Sopenharmony_ci	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
15562306a36Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
15662306a36Sopenharmony_ci	radeon_ring_write(ring, ib->length_dw);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci/**
16162306a36Sopenharmony_ci * cik_sdma_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
16262306a36Sopenharmony_ci *
16362306a36Sopenharmony_ci * @rdev: radeon_device pointer
16462306a36Sopenharmony_ci * @ridx: radeon ring index
16562306a36Sopenharmony_ci *
16662306a36Sopenharmony_ci * Emit an hdp flush packet on the requested DMA ring.
16762306a36Sopenharmony_ci */
16862306a36Sopenharmony_cistatic void cik_sdma_hdp_flush_ring_emit(struct radeon_device *rdev,
16962306a36Sopenharmony_ci					 int ridx)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ridx];
17262306a36Sopenharmony_ci	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
17362306a36Sopenharmony_ci			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
17462306a36Sopenharmony_ci	u32 ref_and_mask;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	if (ridx == R600_RING_TYPE_DMA_INDEX)
17762306a36Sopenharmony_ci		ref_and_mask = SDMA0;
17862306a36Sopenharmony_ci	else
17962306a36Sopenharmony_ci		ref_and_mask = SDMA1;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
18262306a36Sopenharmony_ci	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
18362306a36Sopenharmony_ci	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
18462306a36Sopenharmony_ci	radeon_ring_write(ring, ref_and_mask); /* reference */
18562306a36Sopenharmony_ci	radeon_ring_write(ring, ref_and_mask); /* mask */
18662306a36Sopenharmony_ci	radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci/**
19062306a36Sopenharmony_ci * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
19162306a36Sopenharmony_ci *
19262306a36Sopenharmony_ci * @rdev: radeon_device pointer
19362306a36Sopenharmony_ci * @fence: radeon fence object
19462306a36Sopenharmony_ci *
19562306a36Sopenharmony_ci * Add a DMA fence packet to the ring to write
19662306a36Sopenharmony_ci * the fence seq number and DMA trap packet to generate
19762306a36Sopenharmony_ci * an interrupt if needed (CIK).
19862306a36Sopenharmony_ci */
19962306a36Sopenharmony_civoid cik_sdma_fence_ring_emit(struct radeon_device *rdev,
20062306a36Sopenharmony_ci			      struct radeon_fence *fence)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[fence->ring];
20362306a36Sopenharmony_ci	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/* write the fence */
20662306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
20762306a36Sopenharmony_ci	radeon_ring_write(ring, lower_32_bits(addr));
20862306a36Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(addr));
20962306a36Sopenharmony_ci	radeon_ring_write(ring, fence->seq);
21062306a36Sopenharmony_ci	/* generate an interrupt */
21162306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
21262306a36Sopenharmony_ci	/* flush HDP */
21362306a36Sopenharmony_ci	cik_sdma_hdp_flush_ring_emit(rdev, fence->ring);
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci/**
21762306a36Sopenharmony_ci * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
21862306a36Sopenharmony_ci *
21962306a36Sopenharmony_ci * @rdev: radeon_device pointer
22062306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
22162306a36Sopenharmony_ci * @semaphore: radeon semaphore object
22262306a36Sopenharmony_ci * @emit_wait: wait or signal semaphore
22362306a36Sopenharmony_ci *
22462306a36Sopenharmony_ci * Add a DMA semaphore packet to the ring wait on or signal
22562306a36Sopenharmony_ci * other rings (CIK).
22662306a36Sopenharmony_ci */
22762306a36Sopenharmony_cibool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
22862306a36Sopenharmony_ci				  struct radeon_ring *ring,
22962306a36Sopenharmony_ci				  struct radeon_semaphore *semaphore,
23062306a36Sopenharmony_ci				  bool emit_wait)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	u64 addr = semaphore->gpu_addr;
23362306a36Sopenharmony_ci	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
23662306a36Sopenharmony_ci	radeon_ring_write(ring, addr & 0xfffffff8);
23762306a36Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(addr));
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	return true;
24062306a36Sopenharmony_ci}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci/**
24362306a36Sopenharmony_ci * cik_sdma_gfx_stop - stop the gfx async dma engines
24462306a36Sopenharmony_ci *
24562306a36Sopenharmony_ci * @rdev: radeon_device pointer
24662306a36Sopenharmony_ci *
24762306a36Sopenharmony_ci * Stop the gfx async dma ring buffers (CIK).
24862306a36Sopenharmony_ci */
24962306a36Sopenharmony_cistatic void cik_sdma_gfx_stop(struct radeon_device *rdev)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	u32 rb_cntl, reg_offset;
25262306a36Sopenharmony_ci	int i;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
25562306a36Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
25662306a36Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
25962306a36Sopenharmony_ci		if (i == 0)
26062306a36Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
26162306a36Sopenharmony_ci		else
26262306a36Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
26362306a36Sopenharmony_ci		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
26462306a36Sopenharmony_ci		rb_cntl &= ~SDMA_RB_ENABLE;
26562306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
26662306a36Sopenharmony_ci		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
26762306a36Sopenharmony_ci	}
26862306a36Sopenharmony_ci	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
26962306a36Sopenharmony_ci	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	/* FIXME use something else than big hammer but after few days can not
27262306a36Sopenharmony_ci	 * seem to find good combination so reset SDMA blocks as it seems we
27362306a36Sopenharmony_ci	 * do not shut them down properly. This fix hibernation and does not
27462306a36Sopenharmony_ci	 * affect suspend to ram.
27562306a36Sopenharmony_ci	 */
27662306a36Sopenharmony_ci	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
27762306a36Sopenharmony_ci	(void)RREG32(SRBM_SOFT_RESET);
27862306a36Sopenharmony_ci	udelay(50);
27962306a36Sopenharmony_ci	WREG32(SRBM_SOFT_RESET, 0);
28062306a36Sopenharmony_ci	(void)RREG32(SRBM_SOFT_RESET);
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/**
28462306a36Sopenharmony_ci * cik_sdma_rlc_stop - stop the compute async dma engines
28562306a36Sopenharmony_ci *
28662306a36Sopenharmony_ci * @rdev: radeon_device pointer
28762306a36Sopenharmony_ci *
28862306a36Sopenharmony_ci * Stop the compute async dma queues (CIK).
28962306a36Sopenharmony_ci */
29062306a36Sopenharmony_cistatic void cik_sdma_rlc_stop(struct radeon_device *rdev)
29162306a36Sopenharmony_ci{
29262306a36Sopenharmony_ci	/* XXX todo */
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci/**
29662306a36Sopenharmony_ci * cik_sdma_ctx_switch_enable - enable/disable sdma engine preemption
29762306a36Sopenharmony_ci *
29862306a36Sopenharmony_ci * @rdev: radeon_device pointer
29962306a36Sopenharmony_ci * @enable: enable/disable preemption.
30062306a36Sopenharmony_ci *
30162306a36Sopenharmony_ci * Halt or unhalt the async dma engines (CIK).
30262306a36Sopenharmony_ci */
30362306a36Sopenharmony_cistatic void cik_sdma_ctx_switch_enable(struct radeon_device *rdev, bool enable)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	uint32_t reg_offset, value;
30662306a36Sopenharmony_ci	int i;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
30962306a36Sopenharmony_ci		if (i == 0)
31062306a36Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
31162306a36Sopenharmony_ci		else
31262306a36Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
31362306a36Sopenharmony_ci		value = RREG32(SDMA0_CNTL + reg_offset);
31462306a36Sopenharmony_ci		if (enable)
31562306a36Sopenharmony_ci			value |= AUTO_CTXSW_ENABLE;
31662306a36Sopenharmony_ci		else
31762306a36Sopenharmony_ci			value &= ~AUTO_CTXSW_ENABLE;
31862306a36Sopenharmony_ci		WREG32(SDMA0_CNTL + reg_offset, value);
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/**
32362306a36Sopenharmony_ci * cik_sdma_enable - stop the async dma engines
32462306a36Sopenharmony_ci *
32562306a36Sopenharmony_ci * @rdev: radeon_device pointer
32662306a36Sopenharmony_ci * @enable: enable/disable the DMA MEs.
32762306a36Sopenharmony_ci *
32862306a36Sopenharmony_ci * Halt or unhalt the async dma engines (CIK).
32962306a36Sopenharmony_ci */
33062306a36Sopenharmony_civoid cik_sdma_enable(struct radeon_device *rdev, bool enable)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	u32 me_cntl, reg_offset;
33362306a36Sopenharmony_ci	int i;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	if (!enable) {
33662306a36Sopenharmony_ci		cik_sdma_gfx_stop(rdev);
33762306a36Sopenharmony_ci		cik_sdma_rlc_stop(rdev);
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
34162306a36Sopenharmony_ci		if (i == 0)
34262306a36Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
34362306a36Sopenharmony_ci		else
34462306a36Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
34562306a36Sopenharmony_ci		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
34662306a36Sopenharmony_ci		if (enable)
34762306a36Sopenharmony_ci			me_cntl &= ~SDMA_HALT;
34862306a36Sopenharmony_ci		else
34962306a36Sopenharmony_ci			me_cntl |= SDMA_HALT;
35062306a36Sopenharmony_ci		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
35162306a36Sopenharmony_ci	}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	cik_sdma_ctx_switch_enable(rdev, enable);
35462306a36Sopenharmony_ci}
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci/**
35762306a36Sopenharmony_ci * cik_sdma_gfx_resume - setup and start the async dma engines
35862306a36Sopenharmony_ci *
35962306a36Sopenharmony_ci * @rdev: radeon_device pointer
36062306a36Sopenharmony_ci *
36162306a36Sopenharmony_ci * Set up the gfx DMA ring buffers and enable them (CIK).
36262306a36Sopenharmony_ci * Returns 0 for success, error for failure.
36362306a36Sopenharmony_ci */
36462306a36Sopenharmony_cistatic int cik_sdma_gfx_resume(struct radeon_device *rdev)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	struct radeon_ring *ring;
36762306a36Sopenharmony_ci	u32 rb_cntl, ib_cntl;
36862306a36Sopenharmony_ci	u32 rb_bufsz;
36962306a36Sopenharmony_ci	u32 reg_offset, wb_offset;
37062306a36Sopenharmony_ci	int i, r;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
37362306a36Sopenharmony_ci		if (i == 0) {
37462306a36Sopenharmony_ci			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
37562306a36Sopenharmony_ci			reg_offset = SDMA0_REGISTER_OFFSET;
37662306a36Sopenharmony_ci			wb_offset = R600_WB_DMA_RPTR_OFFSET;
37762306a36Sopenharmony_ci		} else {
37862306a36Sopenharmony_ci			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
37962306a36Sopenharmony_ci			reg_offset = SDMA1_REGISTER_OFFSET;
38062306a36Sopenharmony_ci			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
38162306a36Sopenharmony_ci		}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
38462306a36Sopenharmony_ci		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci		/* Set ring buffer size in dwords */
38762306a36Sopenharmony_ci		rb_bufsz = order_base_2(ring->ring_size / 4);
38862306a36Sopenharmony_ci		rb_cntl = rb_bufsz << 1;
38962306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
39062306a36Sopenharmony_ci		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
39162306a36Sopenharmony_ci#endif
39262306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci		/* Initialize the ring buffer's read and write pointers */
39562306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
39662306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci		/* set the wb address whether it's enabled or not */
39962306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
40062306a36Sopenharmony_ci		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
40162306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
40262306a36Sopenharmony_ci		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci		if (rdev->wb.enabled)
40562306a36Sopenharmony_ci			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
40862306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci		ring->wptr = 0;
41162306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		/* enable DMA RB */
41462306a36Sopenharmony_ci		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci		ib_cntl = SDMA_IB_ENABLE;
41762306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
41862306a36Sopenharmony_ci		ib_cntl |= SDMA_IB_SWAP_ENABLE;
41962306a36Sopenharmony_ci#endif
42062306a36Sopenharmony_ci		/* enable DMA IBs */
42162306a36Sopenharmony_ci		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci		ring->ready = true;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci		r = radeon_ring_test(rdev, ring->idx, ring);
42662306a36Sopenharmony_ci		if (r) {
42762306a36Sopenharmony_ci			ring->ready = false;
42862306a36Sopenharmony_ci			return r;
42962306a36Sopenharmony_ci		}
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
43362306a36Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
43462306a36Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	return 0;
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci/**
44062306a36Sopenharmony_ci * cik_sdma_rlc_resume - setup and start the async dma engines
44162306a36Sopenharmony_ci *
44262306a36Sopenharmony_ci * @rdev: radeon_device pointer
44362306a36Sopenharmony_ci *
44462306a36Sopenharmony_ci * Set up the compute DMA queues and enable them (CIK).
44562306a36Sopenharmony_ci * Returns 0 for success, error for failure.
44662306a36Sopenharmony_ci */
44762306a36Sopenharmony_cistatic int cik_sdma_rlc_resume(struct radeon_device *rdev)
44862306a36Sopenharmony_ci{
44962306a36Sopenharmony_ci	/* XXX todo */
45062306a36Sopenharmony_ci	return 0;
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci/**
45462306a36Sopenharmony_ci * cik_sdma_load_microcode - load the sDMA ME ucode
45562306a36Sopenharmony_ci *
45662306a36Sopenharmony_ci * @rdev: radeon_device pointer
45762306a36Sopenharmony_ci *
45862306a36Sopenharmony_ci * Loads the sDMA0/1 ucode.
45962306a36Sopenharmony_ci * Returns 0 for success, -EINVAL if the ucode is not available.
46062306a36Sopenharmony_ci */
46162306a36Sopenharmony_cistatic int cik_sdma_load_microcode(struct radeon_device *rdev)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	int i;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	if (!rdev->sdma_fw)
46662306a36Sopenharmony_ci		return -EINVAL;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	/* halt the MEs */
46962306a36Sopenharmony_ci	cik_sdma_enable(rdev, false);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	if (rdev->new_fw) {
47262306a36Sopenharmony_ci		const struct sdma_firmware_header_v1_0 *hdr =
47362306a36Sopenharmony_ci			(const struct sdma_firmware_header_v1_0 *)rdev->sdma_fw->data;
47462306a36Sopenharmony_ci		const __le32 *fw_data;
47562306a36Sopenharmony_ci		u32 fw_size;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		radeon_ucode_print_sdma_hdr(&hdr->header);
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci		/* sdma0 */
48062306a36Sopenharmony_ci		fw_data = (const __le32 *)
48162306a36Sopenharmony_ci			(rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
48262306a36Sopenharmony_ci		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
48362306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
48462306a36Sopenharmony_ci		for (i = 0; i < fw_size; i++)
48562306a36Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, le32_to_cpup(fw_data++));
48662306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		/* sdma1 */
48962306a36Sopenharmony_ci		fw_data = (const __le32 *)
49062306a36Sopenharmony_ci			(rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
49162306a36Sopenharmony_ci		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
49262306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
49362306a36Sopenharmony_ci		for (i = 0; i < fw_size; i++)
49462306a36Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, le32_to_cpup(fw_data++));
49562306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
49662306a36Sopenharmony_ci	} else {
49762306a36Sopenharmony_ci		const __be32 *fw_data;
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci		/* sdma0 */
50062306a36Sopenharmony_ci		fw_data = (const __be32 *)rdev->sdma_fw->data;
50162306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
50262306a36Sopenharmony_ci		for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
50362306a36Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
50462306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci		/* sdma1 */
50762306a36Sopenharmony_ci		fw_data = (const __be32 *)rdev->sdma_fw->data;
50862306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
50962306a36Sopenharmony_ci		for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
51062306a36Sopenharmony_ci			WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
51162306a36Sopenharmony_ci		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
51262306a36Sopenharmony_ci	}
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
51562306a36Sopenharmony_ci	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
51662306a36Sopenharmony_ci	return 0;
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci/**
52062306a36Sopenharmony_ci * cik_sdma_resume - setup and start the async dma engines
52162306a36Sopenharmony_ci *
52262306a36Sopenharmony_ci * @rdev: radeon_device pointer
52362306a36Sopenharmony_ci *
52462306a36Sopenharmony_ci * Set up the DMA engines and enable them (CIK).
52562306a36Sopenharmony_ci * Returns 0 for success, error for failure.
52662306a36Sopenharmony_ci */
52762306a36Sopenharmony_ciint cik_sdma_resume(struct radeon_device *rdev)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	int r;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	r = cik_sdma_load_microcode(rdev);
53262306a36Sopenharmony_ci	if (r)
53362306a36Sopenharmony_ci		return r;
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	/* unhalt the MEs */
53662306a36Sopenharmony_ci	cik_sdma_enable(rdev, true);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	/* start the gfx rings and rlc compute queues */
53962306a36Sopenharmony_ci	r = cik_sdma_gfx_resume(rdev);
54062306a36Sopenharmony_ci	if (r)
54162306a36Sopenharmony_ci		return r;
54262306a36Sopenharmony_ci	r = cik_sdma_rlc_resume(rdev);
54362306a36Sopenharmony_ci	if (r)
54462306a36Sopenharmony_ci		return r;
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	return 0;
54762306a36Sopenharmony_ci}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci/**
55062306a36Sopenharmony_ci * cik_sdma_fini - tear down the async dma engines
55162306a36Sopenharmony_ci *
55262306a36Sopenharmony_ci * @rdev: radeon_device pointer
55362306a36Sopenharmony_ci *
55462306a36Sopenharmony_ci * Stop the async dma engines and free the rings (CIK).
55562306a36Sopenharmony_ci */
55662306a36Sopenharmony_civoid cik_sdma_fini(struct radeon_device *rdev)
55762306a36Sopenharmony_ci{
55862306a36Sopenharmony_ci	/* halt the MEs */
55962306a36Sopenharmony_ci	cik_sdma_enable(rdev, false);
56062306a36Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
56162306a36Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
56262306a36Sopenharmony_ci	/* XXX - compute dma queue tear down */
56362306a36Sopenharmony_ci}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci/**
56662306a36Sopenharmony_ci * cik_copy_dma - copy pages using the DMA engine
56762306a36Sopenharmony_ci *
56862306a36Sopenharmony_ci * @rdev: radeon_device pointer
56962306a36Sopenharmony_ci * @src_offset: src GPU address
57062306a36Sopenharmony_ci * @dst_offset: dst GPU address
57162306a36Sopenharmony_ci * @num_gpu_pages: number of GPU pages to xfer
57262306a36Sopenharmony_ci * @resv: reservation object to sync to
57362306a36Sopenharmony_ci *
57462306a36Sopenharmony_ci * Copy GPU paging using the DMA engine (CIK).
57562306a36Sopenharmony_ci * Used by the radeon ttm implementation to move pages if
57662306a36Sopenharmony_ci * registered as the asic copy callback.
57762306a36Sopenharmony_ci */
57862306a36Sopenharmony_cistruct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
57962306a36Sopenharmony_ci				  uint64_t src_offset, uint64_t dst_offset,
58062306a36Sopenharmony_ci				  unsigned num_gpu_pages,
58162306a36Sopenharmony_ci				  struct dma_resv *resv)
58262306a36Sopenharmony_ci{
58362306a36Sopenharmony_ci	struct radeon_fence *fence;
58462306a36Sopenharmony_ci	struct radeon_sync sync;
58562306a36Sopenharmony_ci	int ring_index = rdev->asic->copy.dma_ring_index;
58662306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ring_index];
58762306a36Sopenharmony_ci	u32 size_in_bytes, cur_size_in_bytes;
58862306a36Sopenharmony_ci	int i, num_loops;
58962306a36Sopenharmony_ci	int r = 0;
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	radeon_sync_create(&sync);
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
59462306a36Sopenharmony_ci	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
59562306a36Sopenharmony_ci	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
59662306a36Sopenharmony_ci	if (r) {
59762306a36Sopenharmony_ci		DRM_ERROR("radeon: moving bo (%d).\n", r);
59862306a36Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
59962306a36Sopenharmony_ci		return ERR_PTR(r);
60062306a36Sopenharmony_ci	}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	radeon_sync_resv(rdev, &sync, resv, false);
60362306a36Sopenharmony_ci	radeon_sync_rings(rdev, &sync, ring->idx);
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	for (i = 0; i < num_loops; i++) {
60662306a36Sopenharmony_ci		cur_size_in_bytes = size_in_bytes;
60762306a36Sopenharmony_ci		if (cur_size_in_bytes > 0x1fffff)
60862306a36Sopenharmony_ci			cur_size_in_bytes = 0x1fffff;
60962306a36Sopenharmony_ci		size_in_bytes -= cur_size_in_bytes;
61062306a36Sopenharmony_ci		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
61162306a36Sopenharmony_ci		radeon_ring_write(ring, cur_size_in_bytes);
61262306a36Sopenharmony_ci		radeon_ring_write(ring, 0); /* src/dst endian swap */
61362306a36Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(src_offset));
61462306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(src_offset));
61562306a36Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(dst_offset));
61662306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(dst_offset));
61762306a36Sopenharmony_ci		src_offset += cur_size_in_bytes;
61862306a36Sopenharmony_ci		dst_offset += cur_size_in_bytes;
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	r = radeon_fence_emit(rdev, &fence, ring->idx);
62262306a36Sopenharmony_ci	if (r) {
62362306a36Sopenharmony_ci		radeon_ring_unlock_undo(rdev, ring);
62462306a36Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
62562306a36Sopenharmony_ci		return ERR_PTR(r);
62662306a36Sopenharmony_ci	}
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	radeon_ring_unlock_commit(rdev, ring, false);
62962306a36Sopenharmony_ci	radeon_sync_free(rdev, &sync, fence);
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci	return fence;
63262306a36Sopenharmony_ci}
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci/**
63562306a36Sopenharmony_ci * cik_sdma_ring_test - simple async dma engine test
63662306a36Sopenharmony_ci *
63762306a36Sopenharmony_ci * @rdev: radeon_device pointer
63862306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
63962306a36Sopenharmony_ci *
64062306a36Sopenharmony_ci * Test the DMA engine by writing using it to write an
64162306a36Sopenharmony_ci * value to memory. (CIK).
64262306a36Sopenharmony_ci * Returns 0 for success, error for failure.
64362306a36Sopenharmony_ci */
64462306a36Sopenharmony_ciint cik_sdma_ring_test(struct radeon_device *rdev,
64562306a36Sopenharmony_ci		       struct radeon_ring *ring)
64662306a36Sopenharmony_ci{
64762306a36Sopenharmony_ci	unsigned i;
64862306a36Sopenharmony_ci	int r;
64962306a36Sopenharmony_ci	unsigned index;
65062306a36Sopenharmony_ci	u32 tmp;
65162306a36Sopenharmony_ci	u64 gpu_addr;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
65462306a36Sopenharmony_ci		index = R600_WB_DMA_RING_TEST_OFFSET;
65562306a36Sopenharmony_ci	else
65662306a36Sopenharmony_ci		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	gpu_addr = rdev->wb.gpu_addr + index;
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	tmp = 0xCAFEDEAD;
66162306a36Sopenharmony_ci	rdev->wb.wb[index/4] = cpu_to_le32(tmp);
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	r = radeon_ring_lock(rdev, ring, 5);
66462306a36Sopenharmony_ci	if (r) {
66562306a36Sopenharmony_ci		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
66662306a36Sopenharmony_ci		return r;
66762306a36Sopenharmony_ci	}
66862306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
66962306a36Sopenharmony_ci	radeon_ring_write(ring, lower_32_bits(gpu_addr));
67062306a36Sopenharmony_ci	radeon_ring_write(ring, upper_32_bits(gpu_addr));
67162306a36Sopenharmony_ci	radeon_ring_write(ring, 1); /* number of DWs to follow */
67262306a36Sopenharmony_ci	radeon_ring_write(ring, 0xDEADBEEF);
67362306a36Sopenharmony_ci	radeon_ring_unlock_commit(rdev, ring, false);
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	for (i = 0; i < rdev->usec_timeout; i++) {
67662306a36Sopenharmony_ci		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
67762306a36Sopenharmony_ci		if (tmp == 0xDEADBEEF)
67862306a36Sopenharmony_ci			break;
67962306a36Sopenharmony_ci		udelay(1);
68062306a36Sopenharmony_ci	}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	if (i < rdev->usec_timeout) {
68362306a36Sopenharmony_ci		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
68462306a36Sopenharmony_ci	} else {
68562306a36Sopenharmony_ci		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
68662306a36Sopenharmony_ci			  ring->idx, tmp);
68762306a36Sopenharmony_ci		r = -EINVAL;
68862306a36Sopenharmony_ci	}
68962306a36Sopenharmony_ci	return r;
69062306a36Sopenharmony_ci}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci/**
69362306a36Sopenharmony_ci * cik_sdma_ib_test - test an IB on the DMA engine
69462306a36Sopenharmony_ci *
69562306a36Sopenharmony_ci * @rdev: radeon_device pointer
69662306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
69762306a36Sopenharmony_ci *
69862306a36Sopenharmony_ci * Test a simple IB in the DMA ring (CIK).
69962306a36Sopenharmony_ci * Returns 0 on success, error on failure.
70062306a36Sopenharmony_ci */
70162306a36Sopenharmony_ciint cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
70262306a36Sopenharmony_ci{
70362306a36Sopenharmony_ci	struct radeon_ib ib;
70462306a36Sopenharmony_ci	unsigned i;
70562306a36Sopenharmony_ci	unsigned index;
70662306a36Sopenharmony_ci	int r;
70762306a36Sopenharmony_ci	u32 tmp = 0;
70862306a36Sopenharmony_ci	u64 gpu_addr;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
71162306a36Sopenharmony_ci		index = R600_WB_DMA_RING_TEST_OFFSET;
71262306a36Sopenharmony_ci	else
71362306a36Sopenharmony_ci		index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	gpu_addr = rdev->wb.gpu_addr + index;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	tmp = 0xCAFEDEAD;
71862306a36Sopenharmony_ci	rdev->wb.wb[index/4] = cpu_to_le32(tmp);
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
72162306a36Sopenharmony_ci	if (r) {
72262306a36Sopenharmony_ci		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
72362306a36Sopenharmony_ci		return r;
72462306a36Sopenharmony_ci	}
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
72762306a36Sopenharmony_ci	ib.ptr[1] = lower_32_bits(gpu_addr);
72862306a36Sopenharmony_ci	ib.ptr[2] = upper_32_bits(gpu_addr);
72962306a36Sopenharmony_ci	ib.ptr[3] = 1;
73062306a36Sopenharmony_ci	ib.ptr[4] = 0xDEADBEEF;
73162306a36Sopenharmony_ci	ib.length_dw = 5;
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	r = radeon_ib_schedule(rdev, &ib, NULL, false);
73462306a36Sopenharmony_ci	if (r) {
73562306a36Sopenharmony_ci		radeon_ib_free(rdev, &ib);
73662306a36Sopenharmony_ci		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
73762306a36Sopenharmony_ci		return r;
73862306a36Sopenharmony_ci	}
73962306a36Sopenharmony_ci	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
74062306a36Sopenharmony_ci		RADEON_USEC_IB_TEST_TIMEOUT));
74162306a36Sopenharmony_ci	if (r < 0) {
74262306a36Sopenharmony_ci		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
74362306a36Sopenharmony_ci		return r;
74462306a36Sopenharmony_ci	} else if (r == 0) {
74562306a36Sopenharmony_ci		DRM_ERROR("radeon: fence wait timed out.\n");
74662306a36Sopenharmony_ci		return -ETIMEDOUT;
74762306a36Sopenharmony_ci	}
74862306a36Sopenharmony_ci	r = 0;
74962306a36Sopenharmony_ci	for (i = 0; i < rdev->usec_timeout; i++) {
75062306a36Sopenharmony_ci		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
75162306a36Sopenharmony_ci		if (tmp == 0xDEADBEEF)
75262306a36Sopenharmony_ci			break;
75362306a36Sopenharmony_ci		udelay(1);
75462306a36Sopenharmony_ci	}
75562306a36Sopenharmony_ci	if (i < rdev->usec_timeout) {
75662306a36Sopenharmony_ci		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
75762306a36Sopenharmony_ci	} else {
75862306a36Sopenharmony_ci		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
75962306a36Sopenharmony_ci		r = -EINVAL;
76062306a36Sopenharmony_ci	}
76162306a36Sopenharmony_ci	radeon_ib_free(rdev, &ib);
76262306a36Sopenharmony_ci	return r;
76362306a36Sopenharmony_ci}
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci/**
76662306a36Sopenharmony_ci * cik_sdma_is_lockup - Check if the DMA engine is locked up
76762306a36Sopenharmony_ci *
76862306a36Sopenharmony_ci * @rdev: radeon_device pointer
76962306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
77062306a36Sopenharmony_ci *
77162306a36Sopenharmony_ci * Check if the async DMA engine is locked up (CIK).
77262306a36Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not.
77362306a36Sopenharmony_ci */
77462306a36Sopenharmony_cibool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
77562306a36Sopenharmony_ci{
77662306a36Sopenharmony_ci	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
77762306a36Sopenharmony_ci	u32 mask;
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
78062306a36Sopenharmony_ci		mask = RADEON_RESET_DMA;
78162306a36Sopenharmony_ci	else
78262306a36Sopenharmony_ci		mask = RADEON_RESET_DMA1;
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	if (!(reset_mask & mask)) {
78562306a36Sopenharmony_ci		radeon_ring_lockup_update(rdev, ring);
78662306a36Sopenharmony_ci		return false;
78762306a36Sopenharmony_ci	}
78862306a36Sopenharmony_ci	return radeon_ring_test_lockup(rdev, ring);
78962306a36Sopenharmony_ci}
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci/**
79262306a36Sopenharmony_ci * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART
79362306a36Sopenharmony_ci *
79462306a36Sopenharmony_ci * @rdev: radeon_device pointer
79562306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
79662306a36Sopenharmony_ci * @pe: addr of the page entry
79762306a36Sopenharmony_ci * @src: src addr to copy from
79862306a36Sopenharmony_ci * @count: number of page entries to update
79962306a36Sopenharmony_ci *
80062306a36Sopenharmony_ci * Update PTEs by copying them from the GART using sDMA (CIK).
80162306a36Sopenharmony_ci */
80262306a36Sopenharmony_civoid cik_sdma_vm_copy_pages(struct radeon_device *rdev,
80362306a36Sopenharmony_ci			    struct radeon_ib *ib,
80462306a36Sopenharmony_ci			    uint64_t pe, uint64_t src,
80562306a36Sopenharmony_ci			    unsigned count)
80662306a36Sopenharmony_ci{
80762306a36Sopenharmony_ci	while (count) {
80862306a36Sopenharmony_ci		unsigned bytes = count * 8;
80962306a36Sopenharmony_ci		if (bytes > 0x1FFFF8)
81062306a36Sopenharmony_ci			bytes = 0x1FFFF8;
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
81362306a36Sopenharmony_ci			SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
81462306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = bytes;
81562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
81662306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(src);
81762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(src);
81862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
81962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci		pe += bytes;
82262306a36Sopenharmony_ci		src += bytes;
82362306a36Sopenharmony_ci		count -= bytes / 8;
82462306a36Sopenharmony_ci	}
82562306a36Sopenharmony_ci}
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci/**
82862306a36Sopenharmony_ci * cik_sdma_vm_write_pages - update PTEs by writing them manually
82962306a36Sopenharmony_ci *
83062306a36Sopenharmony_ci * @rdev: radeon_device pointer
83162306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
83262306a36Sopenharmony_ci * @pe: addr of the page entry
83362306a36Sopenharmony_ci * @addr: dst addr to write into pe
83462306a36Sopenharmony_ci * @count: number of page entries to update
83562306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
83662306a36Sopenharmony_ci * @flags: access flags
83762306a36Sopenharmony_ci *
83862306a36Sopenharmony_ci * Update PTEs by writing them manually using sDMA (CIK).
83962306a36Sopenharmony_ci */
84062306a36Sopenharmony_civoid cik_sdma_vm_write_pages(struct radeon_device *rdev,
84162306a36Sopenharmony_ci			     struct radeon_ib *ib,
84262306a36Sopenharmony_ci			     uint64_t pe,
84362306a36Sopenharmony_ci			     uint64_t addr, unsigned count,
84462306a36Sopenharmony_ci			     uint32_t incr, uint32_t flags)
84562306a36Sopenharmony_ci{
84662306a36Sopenharmony_ci	uint64_t value;
84762306a36Sopenharmony_ci	unsigned ndw;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	while (count) {
85062306a36Sopenharmony_ci		ndw = count * 2;
85162306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
85262306a36Sopenharmony_ci			ndw = 0xFFFFE;
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci		/* for non-physically contiguous pages (system) */
85562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
85662306a36Sopenharmony_ci			SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
85762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe;
85862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
85962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = ndw;
86062306a36Sopenharmony_ci		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
86162306a36Sopenharmony_ci			if (flags & R600_PTE_SYSTEM) {
86262306a36Sopenharmony_ci				value = radeon_vm_map_gart(rdev, addr);
86362306a36Sopenharmony_ci			} else if (flags & R600_PTE_VALID) {
86462306a36Sopenharmony_ci				value = addr;
86562306a36Sopenharmony_ci			} else {
86662306a36Sopenharmony_ci				value = 0;
86762306a36Sopenharmony_ci			}
86862306a36Sopenharmony_ci			addr += incr;
86962306a36Sopenharmony_ci			value |= flags;
87062306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = value;
87162306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = upper_32_bits(value);
87262306a36Sopenharmony_ci		}
87362306a36Sopenharmony_ci	}
87462306a36Sopenharmony_ci}
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci/**
87762306a36Sopenharmony_ci * cik_sdma_vm_set_pages - update the page tables using sDMA
87862306a36Sopenharmony_ci *
87962306a36Sopenharmony_ci * @rdev: radeon_device pointer
88062306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
88162306a36Sopenharmony_ci * @pe: addr of the page entry
88262306a36Sopenharmony_ci * @addr: dst addr to write into pe
88362306a36Sopenharmony_ci * @count: number of page entries to update
88462306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
88562306a36Sopenharmony_ci * @flags: access flags
88662306a36Sopenharmony_ci *
88762306a36Sopenharmony_ci * Update the page tables using sDMA (CIK).
88862306a36Sopenharmony_ci */
88962306a36Sopenharmony_civoid cik_sdma_vm_set_pages(struct radeon_device *rdev,
89062306a36Sopenharmony_ci			   struct radeon_ib *ib,
89162306a36Sopenharmony_ci			   uint64_t pe,
89262306a36Sopenharmony_ci			   uint64_t addr, unsigned count,
89362306a36Sopenharmony_ci			   uint32_t incr, uint32_t flags)
89462306a36Sopenharmony_ci{
89562306a36Sopenharmony_ci	uint64_t value;
89662306a36Sopenharmony_ci	unsigned ndw;
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	while (count) {
89962306a36Sopenharmony_ci		ndw = count;
90062306a36Sopenharmony_ci		if (ndw > 0x7FFFF)
90162306a36Sopenharmony_ci			ndw = 0x7FFFF;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci		if (flags & R600_PTE_VALID)
90462306a36Sopenharmony_ci			value = addr;
90562306a36Sopenharmony_ci		else
90662306a36Sopenharmony_ci			value = 0;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci		/* for physically contiguous pages (vram) */
90962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
91062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
91162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
91262306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = flags; /* mask */
91362306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
91462306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
91562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
91662306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
91762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
91862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = ndw; /* number of entries */
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci		pe += ndw * 8;
92162306a36Sopenharmony_ci		addr += ndw * incr;
92262306a36Sopenharmony_ci		count -= ndw;
92362306a36Sopenharmony_ci	}
92462306a36Sopenharmony_ci}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci/**
92762306a36Sopenharmony_ci * cik_sdma_vm_pad_ib - pad the IB to the required number of dw
92862306a36Sopenharmony_ci *
92962306a36Sopenharmony_ci * @ib: indirect buffer to fill with padding
93062306a36Sopenharmony_ci *
93162306a36Sopenharmony_ci */
93262306a36Sopenharmony_civoid cik_sdma_vm_pad_ib(struct radeon_ib *ib)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	while (ib->length_dw & 0x7)
93562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
93662306a36Sopenharmony_ci}
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci/*
93962306a36Sopenharmony_ci * cik_dma_vm_flush - cik vm flush using sDMA
94062306a36Sopenharmony_ci *
94162306a36Sopenharmony_ci * Update the page table base and flush the VM TLB
94262306a36Sopenharmony_ci * using sDMA (CIK).
94362306a36Sopenharmony_ci */
94462306a36Sopenharmony_civoid cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
94562306a36Sopenharmony_ci		      unsigned vm_id, uint64_t pd_addr)
94662306a36Sopenharmony_ci{
94762306a36Sopenharmony_ci	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
94862306a36Sopenharmony_ci			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
95162306a36Sopenharmony_ci	if (vm_id < 8) {
95262306a36Sopenharmony_ci		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
95362306a36Sopenharmony_ci	} else {
95462306a36Sopenharmony_ci		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
95562306a36Sopenharmony_ci	}
95662306a36Sopenharmony_ci	radeon_ring_write(ring, pd_addr >> 12);
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	/* update SH_MEM_* regs */
95962306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
96062306a36Sopenharmony_ci	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
96162306a36Sopenharmony_ci	radeon_ring_write(ring, VMID(vm_id));
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
96462306a36Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_BASES >> 2);
96562306a36Sopenharmony_ci	radeon_ring_write(ring, 0);
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
96862306a36Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
96962306a36Sopenharmony_ci	radeon_ring_write(ring, 0);
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
97262306a36Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
97362306a36Sopenharmony_ci	radeon_ring_write(ring, 1);
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
97662306a36Sopenharmony_ci	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
97762306a36Sopenharmony_ci	radeon_ring_write(ring, 0);
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
98062306a36Sopenharmony_ci	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
98162306a36Sopenharmony_ci	radeon_ring_write(ring, VMID(0));
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	/* flush HDP */
98462306a36Sopenharmony_ci	cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	/* flush TLB */
98762306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
98862306a36Sopenharmony_ci	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
98962306a36Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id);
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
99262306a36Sopenharmony_ci	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
99362306a36Sopenharmony_ci	radeon_ring_write(ring, 0);
99462306a36Sopenharmony_ci	radeon_ring_write(ring, 0); /* reference */
99562306a36Sopenharmony_ci	radeon_ring_write(ring, 0); /* mask */
99662306a36Sopenharmony_ci	radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
99762306a36Sopenharmony_ci}
99862306a36Sopenharmony_ci
999