162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2010 Advanced Micro Devices, Inc.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation
762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
1262306a36Sopenharmony_ci * all copies or substantial portions of the Software.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Authors: Alex Deucher
2362306a36Sopenharmony_ci */
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#include "radeon.h"
2662306a36Sopenharmony_ci#include "radeon_asic.h"
2762306a36Sopenharmony_ci#include "radeon_trace.h"
2862306a36Sopenharmony_ci#include "ni.h"
2962306a36Sopenharmony_ci#include "nid.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/*
3262306a36Sopenharmony_ci * DMA
3362306a36Sopenharmony_ci * Starting with R600, the GPU has an asynchronous
3462306a36Sopenharmony_ci * DMA engine.  The programming model is very similar
3562306a36Sopenharmony_ci * to the 3D engine (ring buffer, IBs, etc.), but the
3662306a36Sopenharmony_ci * DMA controller has it's own packet format that is
3762306a36Sopenharmony_ci * different form the PM4 format used by the 3D engine.
3862306a36Sopenharmony_ci * It supports copying data, writing embedded data,
3962306a36Sopenharmony_ci * solid fills, and a number of other things.  It also
4062306a36Sopenharmony_ci * has support for tiling/detiling of buffers.
4162306a36Sopenharmony_ci * Cayman and newer support two asynchronous DMA engines.
4262306a36Sopenharmony_ci */
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci/**
4562306a36Sopenharmony_ci * cayman_dma_get_rptr - get the current read pointer
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * @rdev: radeon_device pointer
4862306a36Sopenharmony_ci * @ring: radeon ring pointer
4962306a36Sopenharmony_ci *
5062306a36Sopenharmony_ci * Get the current rptr from the hardware (cayman+).
5162306a36Sopenharmony_ci */
5262306a36Sopenharmony_ciuint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
5362306a36Sopenharmony_ci			     struct radeon_ring *ring)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	u32 rptr, reg;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	if (rdev->wb.enabled) {
5862306a36Sopenharmony_ci		rptr = rdev->wb.wb[ring->rptr_offs/4];
5962306a36Sopenharmony_ci	} else {
6062306a36Sopenharmony_ci		if (ring->idx == R600_RING_TYPE_DMA_INDEX)
6162306a36Sopenharmony_ci			reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
6262306a36Sopenharmony_ci		else
6362306a36Sopenharmony_ci			reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci		rptr = RREG32(reg);
6662306a36Sopenharmony_ci	}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	return (rptr & 0x3fffc) >> 2;
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci/**
7262306a36Sopenharmony_ci * cayman_dma_get_wptr - get the current write pointer
7362306a36Sopenharmony_ci *
7462306a36Sopenharmony_ci * @rdev: radeon_device pointer
7562306a36Sopenharmony_ci * @ring: radeon ring pointer
7662306a36Sopenharmony_ci *
7762306a36Sopenharmony_ci * Get the current wptr from the hardware (cayman+).
7862306a36Sopenharmony_ci */
7962306a36Sopenharmony_ciuint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
8062306a36Sopenharmony_ci			   struct radeon_ring *ring)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	u32 reg;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
8562306a36Sopenharmony_ci		reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
8662306a36Sopenharmony_ci	else
8762306a36Sopenharmony_ci		reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	return (RREG32(reg) & 0x3fffc) >> 2;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci/**
9362306a36Sopenharmony_ci * cayman_dma_set_wptr - commit the write pointer
9462306a36Sopenharmony_ci *
9562306a36Sopenharmony_ci * @rdev: radeon_device pointer
9662306a36Sopenharmony_ci * @ring: radeon ring pointer
9762306a36Sopenharmony_ci *
9862306a36Sopenharmony_ci * Write the wptr back to the hardware (cayman+).
9962306a36Sopenharmony_ci */
10062306a36Sopenharmony_civoid cayman_dma_set_wptr(struct radeon_device *rdev,
10162306a36Sopenharmony_ci			 struct radeon_ring *ring)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	u32 reg;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
10662306a36Sopenharmony_ci		reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
10762306a36Sopenharmony_ci	else
10862306a36Sopenharmony_ci		reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	WREG32(reg, (ring->wptr << 2) & 0x3fffc);
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/**
11462306a36Sopenharmony_ci * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
11562306a36Sopenharmony_ci *
11662306a36Sopenharmony_ci * @rdev: radeon_device pointer
11762306a36Sopenharmony_ci * @ib: IB object to schedule
11862306a36Sopenharmony_ci *
11962306a36Sopenharmony_ci * Schedule an IB in the DMA ring (cayman-SI).
12062306a36Sopenharmony_ci */
12162306a36Sopenharmony_civoid cayman_dma_ring_ib_execute(struct radeon_device *rdev,
12262306a36Sopenharmony_ci				struct radeon_ib *ib)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ib->ring];
12562306a36Sopenharmony_ci	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (rdev->wb.enabled) {
12862306a36Sopenharmony_ci		u32 next_rptr = ring->wptr + 4;
12962306a36Sopenharmony_ci		while ((next_rptr & 7) != 5)
13062306a36Sopenharmony_ci			next_rptr++;
13162306a36Sopenharmony_ci		next_rptr += 3;
13262306a36Sopenharmony_ci		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
13362306a36Sopenharmony_ci		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
13462306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
13562306a36Sopenharmony_ci		radeon_ring_write(ring, next_rptr);
13662306a36Sopenharmony_ci	}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
13962306a36Sopenharmony_ci	 * Pad as necessary with NOPs.
14062306a36Sopenharmony_ci	 */
14162306a36Sopenharmony_ci	while ((ring->wptr & 7) != 5)
14262306a36Sopenharmony_ci		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
14362306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
14462306a36Sopenharmony_ci	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
14562306a36Sopenharmony_ci	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci/**
15062306a36Sopenharmony_ci * cayman_dma_stop - stop the async dma engines
15162306a36Sopenharmony_ci *
15262306a36Sopenharmony_ci * @rdev: radeon_device pointer
15362306a36Sopenharmony_ci *
15462306a36Sopenharmony_ci * Stop the async dma engines (cayman-SI).
15562306a36Sopenharmony_ci */
15662306a36Sopenharmony_civoid cayman_dma_stop(struct radeon_device *rdev)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	u32 rb_cntl;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
16162306a36Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
16262306a36Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	/* dma0 */
16562306a36Sopenharmony_ci	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
16662306a36Sopenharmony_ci	rb_cntl &= ~DMA_RB_ENABLE;
16762306a36Sopenharmony_ci	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	/* dma1 */
17062306a36Sopenharmony_ci	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
17162306a36Sopenharmony_ci	rb_cntl &= ~DMA_RB_ENABLE;
17262306a36Sopenharmony_ci	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
17562306a36Sopenharmony_ci	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci/**
17962306a36Sopenharmony_ci * cayman_dma_resume - setup and start the async dma engines
18062306a36Sopenharmony_ci *
18162306a36Sopenharmony_ci * @rdev: radeon_device pointer
18262306a36Sopenharmony_ci *
18362306a36Sopenharmony_ci * Set up the DMA ring buffers and enable them. (cayman-SI).
18462306a36Sopenharmony_ci * Returns 0 for success, error for failure.
18562306a36Sopenharmony_ci */
18662306a36Sopenharmony_ciint cayman_dma_resume(struct radeon_device *rdev)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	struct radeon_ring *ring;
18962306a36Sopenharmony_ci	u32 rb_cntl, dma_cntl, ib_cntl;
19062306a36Sopenharmony_ci	u32 rb_bufsz;
19162306a36Sopenharmony_ci	u32 reg_offset, wb_offset;
19262306a36Sopenharmony_ci	int i, r;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
19562306a36Sopenharmony_ci		if (i == 0) {
19662306a36Sopenharmony_ci			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
19762306a36Sopenharmony_ci			reg_offset = DMA0_REGISTER_OFFSET;
19862306a36Sopenharmony_ci			wb_offset = R600_WB_DMA_RPTR_OFFSET;
19962306a36Sopenharmony_ci		} else {
20062306a36Sopenharmony_ci			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
20162306a36Sopenharmony_ci			reg_offset = DMA1_REGISTER_OFFSET;
20262306a36Sopenharmony_ci			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
20362306a36Sopenharmony_ci		}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
20662306a36Sopenharmony_ci		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci		/* Set ring buffer size in dwords */
20962306a36Sopenharmony_ci		rb_bufsz = order_base_2(ring->ring_size / 4);
21062306a36Sopenharmony_ci		rb_cntl = rb_bufsz << 1;
21162306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
21262306a36Sopenharmony_ci		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
21362306a36Sopenharmony_ci#endif
21462306a36Sopenharmony_ci		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci		/* Initialize the ring buffer's read and write pointers */
21762306a36Sopenharmony_ci		WREG32(DMA_RB_RPTR + reg_offset, 0);
21862306a36Sopenharmony_ci		WREG32(DMA_RB_WPTR + reg_offset, 0);
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci		/* set the wb address whether it's enabled or not */
22162306a36Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
22262306a36Sopenharmony_ci		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
22362306a36Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
22462306a36Sopenharmony_ci		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci		if (rdev->wb.enabled)
22762306a36Sopenharmony_ci			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci		/* enable DMA IBs */
23262306a36Sopenharmony_ci		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
23362306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
23462306a36Sopenharmony_ci		ib_cntl |= DMA_IB_SWAP_ENABLE;
23562306a36Sopenharmony_ci#endif
23662306a36Sopenharmony_ci		WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci		dma_cntl = RREG32(DMA_CNTL + reg_offset);
23962306a36Sopenharmony_ci		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
24062306a36Sopenharmony_ci		WREG32(DMA_CNTL + reg_offset, dma_cntl);
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci		ring->wptr = 0;
24362306a36Sopenharmony_ci		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci		ring->ready = true;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci		r = radeon_ring_test(rdev, ring->idx, ring);
25062306a36Sopenharmony_ci		if (r) {
25162306a36Sopenharmony_ci			ring->ready = false;
25262306a36Sopenharmony_ci			return r;
25362306a36Sopenharmony_ci		}
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
25762306a36Sopenharmony_ci	    (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
25862306a36Sopenharmony_ci		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	return 0;
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci/**
26462306a36Sopenharmony_ci * cayman_dma_fini - tear down the async dma engines
26562306a36Sopenharmony_ci *
26662306a36Sopenharmony_ci * @rdev: radeon_device pointer
26762306a36Sopenharmony_ci *
26862306a36Sopenharmony_ci * Stop the async dma engines and free the rings (cayman-SI).
26962306a36Sopenharmony_ci */
27062306a36Sopenharmony_civoid cayman_dma_fini(struct radeon_device *rdev)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	cayman_dma_stop(rdev);
27362306a36Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
27462306a36Sopenharmony_ci	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci/**
27862306a36Sopenharmony_ci * cayman_dma_is_lockup - Check if the DMA engine is locked up
27962306a36Sopenharmony_ci *
28062306a36Sopenharmony_ci * @rdev: radeon_device pointer
28162306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
28262306a36Sopenharmony_ci *
28362306a36Sopenharmony_ci * Check if the async DMA engine is locked up.
28462306a36Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not.
28562306a36Sopenharmony_ci */
28662306a36Sopenharmony_cibool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
28962306a36Sopenharmony_ci	u32 mask;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
29262306a36Sopenharmony_ci		mask = RADEON_RESET_DMA;
29362306a36Sopenharmony_ci	else
29462306a36Sopenharmony_ci		mask = RADEON_RESET_DMA1;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	if (!(reset_mask & mask)) {
29762306a36Sopenharmony_ci		radeon_ring_lockup_update(rdev, ring);
29862306a36Sopenharmony_ci		return false;
29962306a36Sopenharmony_ci	}
30062306a36Sopenharmony_ci	return radeon_ring_test_lockup(rdev, ring);
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci/**
30462306a36Sopenharmony_ci * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
30562306a36Sopenharmony_ci *
30662306a36Sopenharmony_ci * @rdev: radeon_device pointer
30762306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
30862306a36Sopenharmony_ci * @pe: addr of the page entry
30962306a36Sopenharmony_ci * @src: src addr where to copy from
31062306a36Sopenharmony_ci * @count: number of page entries to update
31162306a36Sopenharmony_ci *
31262306a36Sopenharmony_ci * Update PTEs by copying them from the GART using the DMA (cayman/TN).
31362306a36Sopenharmony_ci */
31462306a36Sopenharmony_civoid cayman_dma_vm_copy_pages(struct radeon_device *rdev,
31562306a36Sopenharmony_ci			      struct radeon_ib *ib,
31662306a36Sopenharmony_ci			      uint64_t pe, uint64_t src,
31762306a36Sopenharmony_ci			      unsigned count)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	unsigned ndw;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	while (count) {
32262306a36Sopenharmony_ci		ndw = count * 2;
32362306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
32462306a36Sopenharmony_ci			ndw = 0xFFFFE;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
32762306a36Sopenharmony_ci						      0, 0, ndw);
32862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
32962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(src);
33062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
33162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci		pe += ndw * 4;
33462306a36Sopenharmony_ci		src += ndw * 4;
33562306a36Sopenharmony_ci		count -= ndw / 2;
33662306a36Sopenharmony_ci	}
33762306a36Sopenharmony_ci}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci/**
34062306a36Sopenharmony_ci * cayman_dma_vm_write_pages - update PTEs by writing them manually
34162306a36Sopenharmony_ci *
34262306a36Sopenharmony_ci * @rdev: radeon_device pointer
34362306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
34462306a36Sopenharmony_ci * @pe: addr of the page entry
34562306a36Sopenharmony_ci * @addr: dst addr to write into pe
34662306a36Sopenharmony_ci * @count: number of page entries to update
34762306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
34862306a36Sopenharmony_ci * @flags: hw access flags
34962306a36Sopenharmony_ci *
35062306a36Sopenharmony_ci * Update PTEs by writing them manually using the DMA (cayman/TN).
35162306a36Sopenharmony_ci */
35262306a36Sopenharmony_civoid cayman_dma_vm_write_pages(struct radeon_device *rdev,
35362306a36Sopenharmony_ci			       struct radeon_ib *ib,
35462306a36Sopenharmony_ci			       uint64_t pe,
35562306a36Sopenharmony_ci			       uint64_t addr, unsigned count,
35662306a36Sopenharmony_ci			       uint32_t incr, uint32_t flags)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	uint64_t value;
35962306a36Sopenharmony_ci	unsigned ndw;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	while (count) {
36262306a36Sopenharmony_ci		ndw = count * 2;
36362306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
36462306a36Sopenharmony_ci			ndw = 0xFFFFE;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci		/* for non-physically contiguous pages (system) */
36762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
36862306a36Sopenharmony_ci						      0, 0, ndw);
36962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe;
37062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
37162306a36Sopenharmony_ci		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
37262306a36Sopenharmony_ci			if (flags & R600_PTE_SYSTEM) {
37362306a36Sopenharmony_ci				value = radeon_vm_map_gart(rdev, addr);
37462306a36Sopenharmony_ci			} else if (flags & R600_PTE_VALID) {
37562306a36Sopenharmony_ci				value = addr;
37662306a36Sopenharmony_ci			} else {
37762306a36Sopenharmony_ci				value = 0;
37862306a36Sopenharmony_ci			}
37962306a36Sopenharmony_ci			addr += incr;
38062306a36Sopenharmony_ci			value |= flags;
38162306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = value;
38262306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = upper_32_bits(value);
38362306a36Sopenharmony_ci		}
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci/**
38862306a36Sopenharmony_ci * cayman_dma_vm_set_pages - update the page tables using the DMA
38962306a36Sopenharmony_ci *
39062306a36Sopenharmony_ci * @rdev: radeon_device pointer
39162306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
39262306a36Sopenharmony_ci * @pe: addr of the page entry
39362306a36Sopenharmony_ci * @addr: dst addr to write into pe
39462306a36Sopenharmony_ci * @count: number of page entries to update
39562306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
39662306a36Sopenharmony_ci * @flags: hw access flags
39762306a36Sopenharmony_ci *
39862306a36Sopenharmony_ci * Update the page tables using the DMA (cayman/TN).
39962306a36Sopenharmony_ci */
40062306a36Sopenharmony_civoid cayman_dma_vm_set_pages(struct radeon_device *rdev,
40162306a36Sopenharmony_ci			     struct radeon_ib *ib,
40262306a36Sopenharmony_ci			     uint64_t pe,
40362306a36Sopenharmony_ci			     uint64_t addr, unsigned count,
40462306a36Sopenharmony_ci			     uint32_t incr, uint32_t flags)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	uint64_t value;
40762306a36Sopenharmony_ci	unsigned ndw;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	while (count) {
41062306a36Sopenharmony_ci		ndw = count * 2;
41162306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
41262306a36Sopenharmony_ci			ndw = 0xFFFFE;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci		if (flags & R600_PTE_VALID)
41562306a36Sopenharmony_ci			value = addr;
41662306a36Sopenharmony_ci		else
41762306a36Sopenharmony_ci			value = 0;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci		/* for physically contiguous pages (vram) */
42062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
42162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
42262306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
42362306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = flags; /* mask */
42462306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
42562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
42662306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
42762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
42862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		pe += ndw * 4;
43162306a36Sopenharmony_ci		addr += (ndw / 2) * incr;
43262306a36Sopenharmony_ci		count -= ndw / 2;
43362306a36Sopenharmony_ci	}
43462306a36Sopenharmony_ci}
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci/**
43762306a36Sopenharmony_ci * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
43862306a36Sopenharmony_ci *
43962306a36Sopenharmony_ci * @ib: indirect buffer to fill with padding
44062306a36Sopenharmony_ci *
44162306a36Sopenharmony_ci */
44262306a36Sopenharmony_civoid cayman_dma_vm_pad_ib(struct radeon_ib *ib)
44362306a36Sopenharmony_ci{
44462306a36Sopenharmony_ci	while (ib->length_dw & 0x7)
44562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
44662306a36Sopenharmony_ci}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_civoid cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
44962306a36Sopenharmony_ci			 unsigned vm_id, uint64_t pd_addr)
45062306a36Sopenharmony_ci{
45162306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
45262306a36Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
45362306a36Sopenharmony_ci	radeon_ring_write(ring, pd_addr >> 12);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	/* flush hdp cache */
45662306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
45762306a36Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
45862306a36Sopenharmony_ci	radeon_ring_write(ring, 1);
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	/* bits 0-7 are the VM contexts0-7 */
46162306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
46262306a36Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
46362306a36Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id);
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	/* wait for invalidate to complete */
46662306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
46762306a36Sopenharmony_ci	radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2));
46862306a36Sopenharmony_ci	radeon_ring_write(ring, 0); /* mask */
46962306a36Sopenharmony_ci	radeon_ring_write(ring, 0); /* value */
47062306a36Sopenharmony_ci}
47162306a36Sopenharmony_ci
472