18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright 2010 Advanced Micro Devices, Inc.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation
78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
218c2ecf20Sopenharmony_ci *
228c2ecf20Sopenharmony_ci * Authors: Alex Deucher
238c2ecf20Sopenharmony_ci */
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#include "radeon.h"
268c2ecf20Sopenharmony_ci#include "radeon_asic.h"
278c2ecf20Sopenharmony_ci#include "radeon_trace.h"
288c2ecf20Sopenharmony_ci#include "nid.h"
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ciu32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/*
338c2ecf20Sopenharmony_ci * DMA
348c2ecf20Sopenharmony_ci * Starting with R600, the GPU has an asynchronous
358c2ecf20Sopenharmony_ci * DMA engine.  The programming model is very similar
368c2ecf20Sopenharmony_ci * to the 3D engine (ring buffer, IBs, etc.), but the
378c2ecf20Sopenharmony_ci * DMA controller has it's own packet format that is
388c2ecf20Sopenharmony_ci * different form the PM4 format used by the 3D engine.
398c2ecf20Sopenharmony_ci * It supports copying data, writing embedded data,
408c2ecf20Sopenharmony_ci * solid fills, and a number of other things.  It also
418c2ecf20Sopenharmony_ci * has support for tiling/detiling of buffers.
428c2ecf20Sopenharmony_ci * Cayman and newer support two asynchronous DMA engines.
438c2ecf20Sopenharmony_ci */
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/**
468c2ecf20Sopenharmony_ci * cayman_dma_get_rptr - get the current read pointer
478c2ecf20Sopenharmony_ci *
488c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
498c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
508c2ecf20Sopenharmony_ci *
518c2ecf20Sopenharmony_ci * Get the current rptr from the hardware (cayman+).
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_ciuint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
548c2ecf20Sopenharmony_ci			     struct radeon_ring *ring)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	u32 rptr, reg;
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	if (rdev->wb.enabled) {
598c2ecf20Sopenharmony_ci		rptr = rdev->wb.wb[ring->rptr_offs/4];
608c2ecf20Sopenharmony_ci	} else {
618c2ecf20Sopenharmony_ci		if (ring->idx == R600_RING_TYPE_DMA_INDEX)
628c2ecf20Sopenharmony_ci			reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
638c2ecf20Sopenharmony_ci		else
648c2ecf20Sopenharmony_ci			reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci		rptr = RREG32(reg);
678c2ecf20Sopenharmony_ci	}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	return (rptr & 0x3fffc) >> 2;
708c2ecf20Sopenharmony_ci}
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci/**
738c2ecf20Sopenharmony_ci * cayman_dma_get_wptr - get the current write pointer
748c2ecf20Sopenharmony_ci *
758c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
768c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
778c2ecf20Sopenharmony_ci *
788c2ecf20Sopenharmony_ci * Get the current wptr from the hardware (cayman+).
798c2ecf20Sopenharmony_ci */
808c2ecf20Sopenharmony_ciuint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
818c2ecf20Sopenharmony_ci			   struct radeon_ring *ring)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	u32 reg;
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
868c2ecf20Sopenharmony_ci		reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
878c2ecf20Sopenharmony_ci	else
888c2ecf20Sopenharmony_ci		reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	return (RREG32(reg) & 0x3fffc) >> 2;
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/**
948c2ecf20Sopenharmony_ci * cayman_dma_set_wptr - commit the write pointer
958c2ecf20Sopenharmony_ci *
968c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
978c2ecf20Sopenharmony_ci * @ring: radeon ring pointer
988c2ecf20Sopenharmony_ci *
998c2ecf20Sopenharmony_ci * Write the wptr back to the hardware (cayman+).
1008c2ecf20Sopenharmony_ci */
1018c2ecf20Sopenharmony_civoid cayman_dma_set_wptr(struct radeon_device *rdev,
1028c2ecf20Sopenharmony_ci			 struct radeon_ring *ring)
1038c2ecf20Sopenharmony_ci{
1048c2ecf20Sopenharmony_ci	u32 reg;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1078c2ecf20Sopenharmony_ci		reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
1088c2ecf20Sopenharmony_ci	else
1098c2ecf20Sopenharmony_ci		reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	WREG32(reg, (ring->wptr << 2) & 0x3fffc);
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci/**
1158c2ecf20Sopenharmony_ci * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1168c2ecf20Sopenharmony_ci *
1178c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1188c2ecf20Sopenharmony_ci * @ib: IB object to schedule
1198c2ecf20Sopenharmony_ci *
1208c2ecf20Sopenharmony_ci * Schedule an IB in the DMA ring (cayman-SI).
1218c2ecf20Sopenharmony_ci */
1228c2ecf20Sopenharmony_civoid cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1238c2ecf20Sopenharmony_ci				struct radeon_ib *ib)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ib->ring];
1268c2ecf20Sopenharmony_ci	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	if (rdev->wb.enabled) {
1298c2ecf20Sopenharmony_ci		u32 next_rptr = ring->wptr + 4;
1308c2ecf20Sopenharmony_ci		while ((next_rptr & 7) != 5)
1318c2ecf20Sopenharmony_ci			next_rptr++;
1328c2ecf20Sopenharmony_ci		next_rptr += 3;
1338c2ecf20Sopenharmony_ci		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1348c2ecf20Sopenharmony_ci		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1358c2ecf20Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1368c2ecf20Sopenharmony_ci		radeon_ring_write(ring, next_rptr);
1378c2ecf20Sopenharmony_ci	}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1408c2ecf20Sopenharmony_ci	 * Pad as necessary with NOPs.
1418c2ecf20Sopenharmony_ci	 */
1428c2ecf20Sopenharmony_ci	while ((ring->wptr & 7) != 5)
1438c2ecf20Sopenharmony_ci		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1448c2ecf20Sopenharmony_ci	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
1458c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1468c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci/**
1518c2ecf20Sopenharmony_ci * cayman_dma_stop - stop the async dma engines
1528c2ecf20Sopenharmony_ci *
1538c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1548c2ecf20Sopenharmony_ci *
1558c2ecf20Sopenharmony_ci * Stop the async dma engines (cayman-SI).
1568c2ecf20Sopenharmony_ci */
1578c2ecf20Sopenharmony_civoid cayman_dma_stop(struct radeon_device *rdev)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	u32 rb_cntl;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
1628c2ecf20Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
1638c2ecf20Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	/* dma0 */
1668c2ecf20Sopenharmony_ci	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1678c2ecf20Sopenharmony_ci	rb_cntl &= ~DMA_RB_ENABLE;
1688c2ecf20Sopenharmony_ci	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	/* dma1 */
1718c2ecf20Sopenharmony_ci	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1728c2ecf20Sopenharmony_ci	rb_cntl &= ~DMA_RB_ENABLE;
1738c2ecf20Sopenharmony_ci	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1768c2ecf20Sopenharmony_ci	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci/**
1808c2ecf20Sopenharmony_ci * cayman_dma_resume - setup and start the async dma engines
1818c2ecf20Sopenharmony_ci *
1828c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
1838c2ecf20Sopenharmony_ci *
1848c2ecf20Sopenharmony_ci * Set up the DMA ring buffers and enable them. (cayman-SI).
1858c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
1868c2ecf20Sopenharmony_ci */
1878c2ecf20Sopenharmony_ciint cayman_dma_resume(struct radeon_device *rdev)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	struct radeon_ring *ring;
1908c2ecf20Sopenharmony_ci	u32 rb_cntl, dma_cntl, ib_cntl;
1918c2ecf20Sopenharmony_ci	u32 rb_bufsz;
1928c2ecf20Sopenharmony_ci	u32 reg_offset, wb_offset;
1938c2ecf20Sopenharmony_ci	int i, r;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	for (i = 0; i < 2; i++) {
1968c2ecf20Sopenharmony_ci		if (i == 0) {
1978c2ecf20Sopenharmony_ci			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1988c2ecf20Sopenharmony_ci			reg_offset = DMA0_REGISTER_OFFSET;
1998c2ecf20Sopenharmony_ci			wb_offset = R600_WB_DMA_RPTR_OFFSET;
2008c2ecf20Sopenharmony_ci		} else {
2018c2ecf20Sopenharmony_ci			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2028c2ecf20Sopenharmony_ci			reg_offset = DMA1_REGISTER_OFFSET;
2038c2ecf20Sopenharmony_ci			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2048c2ecf20Sopenharmony_ci		}
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2078c2ecf20Sopenharmony_ci		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci		/* Set ring buffer size in dwords */
2108c2ecf20Sopenharmony_ci		rb_bufsz = order_base_2(ring->ring_size / 4);
2118c2ecf20Sopenharmony_ci		rb_cntl = rb_bufsz << 1;
2128c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
2138c2ecf20Sopenharmony_ci		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2148c2ecf20Sopenharmony_ci#endif
2158c2ecf20Sopenharmony_ci		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci		/* Initialize the ring buffer's read and write pointers */
2188c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR + reg_offset, 0);
2198c2ecf20Sopenharmony_ci		WREG32(DMA_RB_WPTR + reg_offset, 0);
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci		/* set the wb address whether it's enabled or not */
2228c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
2238c2ecf20Sopenharmony_ci		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
2248c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
2258c2ecf20Sopenharmony_ci		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		if (rdev->wb.enabled)
2288c2ecf20Sopenharmony_ci			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci		/* enable DMA IBs */
2338c2ecf20Sopenharmony_ci		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
2348c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
2358c2ecf20Sopenharmony_ci		ib_cntl |= DMA_IB_SWAP_ENABLE;
2368c2ecf20Sopenharmony_ci#endif
2378c2ecf20Sopenharmony_ci		WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci		dma_cntl = RREG32(DMA_CNTL + reg_offset);
2408c2ecf20Sopenharmony_ci		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2418c2ecf20Sopenharmony_ci		WREG32(DMA_CNTL + reg_offset, dma_cntl);
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci		ring->wptr = 0;
2448c2ecf20Sopenharmony_ci		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci		ring->ready = true;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci		r = radeon_ring_test(rdev, ring->idx, ring);
2518c2ecf20Sopenharmony_ci		if (r) {
2528c2ecf20Sopenharmony_ci			ring->ready = false;
2538c2ecf20Sopenharmony_ci			return r;
2548c2ecf20Sopenharmony_ci		}
2558c2ecf20Sopenharmony_ci	}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
2588c2ecf20Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
2598c2ecf20Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	return 0;
2628c2ecf20Sopenharmony_ci}
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci/**
2658c2ecf20Sopenharmony_ci * cayman_dma_fini - tear down the async dma engines
2668c2ecf20Sopenharmony_ci *
2678c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
2688c2ecf20Sopenharmony_ci *
2698c2ecf20Sopenharmony_ci * Stop the async dma engines and free the rings (cayman-SI).
2708c2ecf20Sopenharmony_ci */
2718c2ecf20Sopenharmony_civoid cayman_dma_fini(struct radeon_device *rdev)
2728c2ecf20Sopenharmony_ci{
2738c2ecf20Sopenharmony_ci	cayman_dma_stop(rdev);
2748c2ecf20Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2758c2ecf20Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2768c2ecf20Sopenharmony_ci}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci/**
2798c2ecf20Sopenharmony_ci * cayman_dma_is_lockup - Check if the DMA engine is locked up
2808c2ecf20Sopenharmony_ci *
2818c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
2828c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information
2838c2ecf20Sopenharmony_ci *
2848c2ecf20Sopenharmony_ci * Check if the async DMA engine is locked up.
2858c2ecf20Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not.
2868c2ecf20Sopenharmony_ci */
2878c2ecf20Sopenharmony_cibool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2888c2ecf20Sopenharmony_ci{
2898c2ecf20Sopenharmony_ci	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2908c2ecf20Sopenharmony_ci	u32 mask;
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2938c2ecf20Sopenharmony_ci		mask = RADEON_RESET_DMA;
2948c2ecf20Sopenharmony_ci	else
2958c2ecf20Sopenharmony_ci		mask = RADEON_RESET_DMA1;
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	if (!(reset_mask & mask)) {
2988c2ecf20Sopenharmony_ci		radeon_ring_lockup_update(rdev, ring);
2998c2ecf20Sopenharmony_ci		return false;
3008c2ecf20Sopenharmony_ci	}
3018c2ecf20Sopenharmony_ci	return radeon_ring_test_lockup(rdev, ring);
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/**
3058c2ecf20Sopenharmony_ci * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
3068c2ecf20Sopenharmony_ci *
3078c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3088c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3098c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3108c2ecf20Sopenharmony_ci * @src: src addr where to copy from
3118c2ecf20Sopenharmony_ci * @count: number of page entries to update
3128c2ecf20Sopenharmony_ci *
3138c2ecf20Sopenharmony_ci * Update PTEs by copying them from the GART using the DMA (cayman/TN).
3148c2ecf20Sopenharmony_ci */
3158c2ecf20Sopenharmony_civoid cayman_dma_vm_copy_pages(struct radeon_device *rdev,
3168c2ecf20Sopenharmony_ci			      struct radeon_ib *ib,
3178c2ecf20Sopenharmony_ci			      uint64_t pe, uint64_t src,
3188c2ecf20Sopenharmony_ci			      unsigned count)
3198c2ecf20Sopenharmony_ci{
3208c2ecf20Sopenharmony_ci	unsigned ndw;
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	while (count) {
3238c2ecf20Sopenharmony_ci		ndw = count * 2;
3248c2ecf20Sopenharmony_ci		if (ndw > 0xFFFFE)
3258c2ecf20Sopenharmony_ci			ndw = 0xFFFFE;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
3288c2ecf20Sopenharmony_ci						      0, 0, ndw);
3298c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
3308c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(src);
3318c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3328c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci		pe += ndw * 4;
3358c2ecf20Sopenharmony_ci		src += ndw * 4;
3368c2ecf20Sopenharmony_ci		count -= ndw / 2;
3378c2ecf20Sopenharmony_ci	}
3388c2ecf20Sopenharmony_ci}
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci/**
3418c2ecf20Sopenharmony_ci * cayman_dma_vm_write_pages - update PTEs by writing them manually
3428c2ecf20Sopenharmony_ci *
3438c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3448c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3458c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3468c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe
3478c2ecf20Sopenharmony_ci * @count: number of page entries to update
3488c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
3498c2ecf20Sopenharmony_ci * @flags: hw access flags
3508c2ecf20Sopenharmony_ci *
3518c2ecf20Sopenharmony_ci * Update PTEs by writing them manually using the DMA (cayman/TN).
3528c2ecf20Sopenharmony_ci */
3538c2ecf20Sopenharmony_civoid cayman_dma_vm_write_pages(struct radeon_device *rdev,
3548c2ecf20Sopenharmony_ci			       struct radeon_ib *ib,
3558c2ecf20Sopenharmony_ci			       uint64_t pe,
3568c2ecf20Sopenharmony_ci			       uint64_t addr, unsigned count,
3578c2ecf20Sopenharmony_ci			       uint32_t incr, uint32_t flags)
3588c2ecf20Sopenharmony_ci{
3598c2ecf20Sopenharmony_ci	uint64_t value;
3608c2ecf20Sopenharmony_ci	unsigned ndw;
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	while (count) {
3638c2ecf20Sopenharmony_ci		ndw = count * 2;
3648c2ecf20Sopenharmony_ci		if (ndw > 0xFFFFE)
3658c2ecf20Sopenharmony_ci			ndw = 0xFFFFE;
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci		/* for non-physically contiguous pages (system) */
3688c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
3698c2ecf20Sopenharmony_ci						      0, 0, ndw);
3708c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe;
3718c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3728c2ecf20Sopenharmony_ci		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3738c2ecf20Sopenharmony_ci			if (flags & R600_PTE_SYSTEM) {
3748c2ecf20Sopenharmony_ci				value = radeon_vm_map_gart(rdev, addr);
3758c2ecf20Sopenharmony_ci			} else if (flags & R600_PTE_VALID) {
3768c2ecf20Sopenharmony_ci				value = addr;
3778c2ecf20Sopenharmony_ci			} else {
3788c2ecf20Sopenharmony_ci				value = 0;
3798c2ecf20Sopenharmony_ci			}
3808c2ecf20Sopenharmony_ci			addr += incr;
3818c2ecf20Sopenharmony_ci			value |= flags;
3828c2ecf20Sopenharmony_ci			ib->ptr[ib->length_dw++] = value;
3838c2ecf20Sopenharmony_ci			ib->ptr[ib->length_dw++] = upper_32_bits(value);
3848c2ecf20Sopenharmony_ci		}
3858c2ecf20Sopenharmony_ci	}
3868c2ecf20Sopenharmony_ci}
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci/**
3898c2ecf20Sopenharmony_ci * cayman_dma_vm_set_pages - update the page tables using the DMA
3908c2ecf20Sopenharmony_ci *
3918c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer
3928c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3938c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3948c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe
3958c2ecf20Sopenharmony_ci * @count: number of page entries to update
3968c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
3978c2ecf20Sopenharmony_ci * @flags: hw access flags
3988c2ecf20Sopenharmony_ci *
3998c2ecf20Sopenharmony_ci * Update the page tables using the DMA (cayman/TN).
4008c2ecf20Sopenharmony_ci */
4018c2ecf20Sopenharmony_civoid cayman_dma_vm_set_pages(struct radeon_device *rdev,
4028c2ecf20Sopenharmony_ci			     struct radeon_ib *ib,
4038c2ecf20Sopenharmony_ci			     uint64_t pe,
4048c2ecf20Sopenharmony_ci			     uint64_t addr, unsigned count,
4058c2ecf20Sopenharmony_ci			     uint32_t incr, uint32_t flags)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	uint64_t value;
4088c2ecf20Sopenharmony_ci	unsigned ndw;
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	while (count) {
4118c2ecf20Sopenharmony_ci		ndw = count * 2;
4128c2ecf20Sopenharmony_ci		if (ndw > 0xFFFFE)
4138c2ecf20Sopenharmony_ci			ndw = 0xFFFFE;
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci		if (flags & R600_PTE_VALID)
4168c2ecf20Sopenharmony_ci			value = addr;
4178c2ecf20Sopenharmony_ci		else
4188c2ecf20Sopenharmony_ci			value = 0;
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci		/* for physically contiguous pages (vram) */
4218c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4228c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
4238c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4248c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = flags; /* mask */
4258c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
4268c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
4278c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
4288c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
4298c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci		pe += ndw * 4;
4328c2ecf20Sopenharmony_ci		addr += (ndw / 2) * incr;
4338c2ecf20Sopenharmony_ci		count -= ndw / 2;
4348c2ecf20Sopenharmony_ci	}
4358c2ecf20Sopenharmony_ci}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci/**
4388c2ecf20Sopenharmony_ci * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
4398c2ecf20Sopenharmony_ci *
4408c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with padding
4418c2ecf20Sopenharmony_ci *
4428c2ecf20Sopenharmony_ci */
4438c2ecf20Sopenharmony_civoid cayman_dma_vm_pad_ib(struct radeon_ib *ib)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	while (ib->length_dw & 0x7)
4468c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
4478c2ecf20Sopenharmony_ci}
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_civoid cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
4508c2ecf20Sopenharmony_ci			 unsigned vm_id, uint64_t pd_addr)
4518c2ecf20Sopenharmony_ci{
4528c2ecf20Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
4538c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
4548c2ecf20Sopenharmony_ci	radeon_ring_write(ring, pd_addr >> 12);
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	/* flush hdp cache */
4578c2ecf20Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
4588c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4598c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 1);
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	/* bits 0-7 are the VM contexts0-7 */
4628c2ecf20Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
4638c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4648c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id);
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	/* wait for invalidate to complete */
4678c2ecf20Sopenharmony_ci	radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
4688c2ecf20Sopenharmony_ci	radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2));
4698c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0); /* mask */
4708c2ecf20Sopenharmony_ci	radeon_ring_write(ring, 0); /* value */
4718c2ecf20Sopenharmony_ci}
4728c2ecf20Sopenharmony_ci
473