162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2013 Advanced Micro Devices, Inc.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation
762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
1262306a36Sopenharmony_ci * all copies or substantial portions of the Software.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Authors: Alex Deucher
2362306a36Sopenharmony_ci */
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#include "radeon.h"
2662306a36Sopenharmony_ci#include "radeon_asic.h"
2762306a36Sopenharmony_ci#include "radeon_trace.h"
2862306a36Sopenharmony_ci#include "si.h"
2962306a36Sopenharmony_ci#include "sid.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/**
3262306a36Sopenharmony_ci * si_dma_is_lockup - Check if the DMA engine is locked up
3362306a36Sopenharmony_ci *
3462306a36Sopenharmony_ci * @rdev: radeon_device pointer
3562306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information
3662306a36Sopenharmony_ci *
3762306a36Sopenharmony_ci * Check if the async DMA engine is locked up.
3862306a36Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not.
3962306a36Sopenharmony_ci */
4062306a36Sopenharmony_cibool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4362306a36Sopenharmony_ci	u32 mask;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4662306a36Sopenharmony_ci		mask = RADEON_RESET_DMA;
4762306a36Sopenharmony_ci	else
4862306a36Sopenharmony_ci		mask = RADEON_RESET_DMA1;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	if (!(reset_mask & mask)) {
5162306a36Sopenharmony_ci		radeon_ring_lockup_update(rdev, ring);
5262306a36Sopenharmony_ci		return false;
5362306a36Sopenharmony_ci	}
5462306a36Sopenharmony_ci	return radeon_ring_test_lockup(rdev, ring);
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/**
5862306a36Sopenharmony_ci * si_dma_vm_copy_pages - update PTEs by copying them from the GART
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * @rdev: radeon_device pointer
6162306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
6262306a36Sopenharmony_ci * @pe: addr of the page entry
6362306a36Sopenharmony_ci * @src: src addr where to copy from
6462306a36Sopenharmony_ci * @count: number of page entries to update
6562306a36Sopenharmony_ci *
6662306a36Sopenharmony_ci * Update PTEs by copying them from the GART using the DMA (SI).
6762306a36Sopenharmony_ci */
6862306a36Sopenharmony_civoid si_dma_vm_copy_pages(struct radeon_device *rdev,
6962306a36Sopenharmony_ci			  struct radeon_ib *ib,
7062306a36Sopenharmony_ci			  uint64_t pe, uint64_t src,
7162306a36Sopenharmony_ci			  unsigned count)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	while (count) {
7462306a36Sopenharmony_ci		unsigned bytes = count * 8;
7562306a36Sopenharmony_ci		if (bytes > 0xFFFF8)
7662306a36Sopenharmony_ci			bytes = 0xFFFF8;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
7962306a36Sopenharmony_ci						      1, 0, 0, bytes);
8062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
8162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(src);
8262306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
8362306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci		pe += bytes;
8662306a36Sopenharmony_ci		src += bytes;
8762306a36Sopenharmony_ci		count -= bytes / 8;
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci/**
9262306a36Sopenharmony_ci * si_dma_vm_write_pages - update PTEs by writing them manually
9362306a36Sopenharmony_ci *
9462306a36Sopenharmony_ci * @rdev: radeon_device pointer
9562306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
9662306a36Sopenharmony_ci * @pe: addr of the page entry
9762306a36Sopenharmony_ci * @addr: dst addr to write into pe
9862306a36Sopenharmony_ci * @count: number of page entries to update
9962306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
10062306a36Sopenharmony_ci * @flags: access flags
10162306a36Sopenharmony_ci *
10262306a36Sopenharmony_ci * Update PTEs by writing them manually using the DMA (SI).
10362306a36Sopenharmony_ci */
10462306a36Sopenharmony_civoid si_dma_vm_write_pages(struct radeon_device *rdev,
10562306a36Sopenharmony_ci			   struct radeon_ib *ib,
10662306a36Sopenharmony_ci			   uint64_t pe,
10762306a36Sopenharmony_ci			   uint64_t addr, unsigned count,
10862306a36Sopenharmony_ci			   uint32_t incr, uint32_t flags)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	uint64_t value;
11162306a36Sopenharmony_ci	unsigned ndw;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	while (count) {
11462306a36Sopenharmony_ci		ndw = count * 2;
11562306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
11662306a36Sopenharmony_ci			ndw = 0xFFFFE;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci		/* for non-physically contiguous pages (system) */
11962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
12062306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe;
12162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
12262306a36Sopenharmony_ci		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
12362306a36Sopenharmony_ci			if (flags & R600_PTE_SYSTEM) {
12462306a36Sopenharmony_ci				value = radeon_vm_map_gart(rdev, addr);
12562306a36Sopenharmony_ci			} else if (flags & R600_PTE_VALID) {
12662306a36Sopenharmony_ci				value = addr;
12762306a36Sopenharmony_ci			} else {
12862306a36Sopenharmony_ci				value = 0;
12962306a36Sopenharmony_ci			}
13062306a36Sopenharmony_ci			addr += incr;
13162306a36Sopenharmony_ci			value |= flags;
13262306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = value;
13362306a36Sopenharmony_ci			ib->ptr[ib->length_dw++] = upper_32_bits(value);
13462306a36Sopenharmony_ci		}
13562306a36Sopenharmony_ci	}
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/**
13962306a36Sopenharmony_ci * si_dma_vm_set_pages - update the page tables using the DMA
14062306a36Sopenharmony_ci *
14162306a36Sopenharmony_ci * @rdev: radeon_device pointer
14262306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands
14362306a36Sopenharmony_ci * @pe: addr of the page entry
14462306a36Sopenharmony_ci * @addr: dst addr to write into pe
14562306a36Sopenharmony_ci * @count: number of page entries to update
14662306a36Sopenharmony_ci * @incr: increase next addr by incr bytes
14762306a36Sopenharmony_ci * @flags: access flags
14862306a36Sopenharmony_ci *
14962306a36Sopenharmony_ci * Update the page tables using the DMA (SI).
15062306a36Sopenharmony_ci */
15162306a36Sopenharmony_civoid si_dma_vm_set_pages(struct radeon_device *rdev,
15262306a36Sopenharmony_ci			 struct radeon_ib *ib,
15362306a36Sopenharmony_ci			 uint64_t pe,
15462306a36Sopenharmony_ci			 uint64_t addr, unsigned count,
15562306a36Sopenharmony_ci			 uint32_t incr, uint32_t flags)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	uint64_t value;
15862306a36Sopenharmony_ci	unsigned ndw;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	while (count) {
16162306a36Sopenharmony_ci		ndw = count * 2;
16262306a36Sopenharmony_ci		if (ndw > 0xFFFFE)
16362306a36Sopenharmony_ci			ndw = 0xFFFFE;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci		if (flags & R600_PTE_VALID)
16662306a36Sopenharmony_ci			value = addr;
16762306a36Sopenharmony_ci		else
16862306a36Sopenharmony_ci			value = 0;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci		/* for physically contiguous pages (vram) */
17162306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
17262306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
17362306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
17462306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = flags; /* mask */
17562306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
17662306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
17762306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
17862306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
17962306a36Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
18062306a36Sopenharmony_ci		pe += ndw * 4;
18162306a36Sopenharmony_ci		addr += (ndw / 2) * incr;
18262306a36Sopenharmony_ci		count -= ndw / 2;
18362306a36Sopenharmony_ci	}
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_civoid si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
18762306a36Sopenharmony_ci		     unsigned vm_id, uint64_t pd_addr)
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
19162306a36Sopenharmony_ci	if (vm_id < 8) {
19262306a36Sopenharmony_ci		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
19362306a36Sopenharmony_ci	} else {
19462306a36Sopenharmony_ci		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
19562306a36Sopenharmony_ci	}
19662306a36Sopenharmony_ci	radeon_ring_write(ring, pd_addr >> 12);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	/* flush hdp cache */
19962306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
20062306a36Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
20162306a36Sopenharmony_ci	radeon_ring_write(ring, 1);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	/* bits 0-7 are the VM contexts0-7 */
20462306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
20562306a36Sopenharmony_ci	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
20662306a36Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	/* wait for invalidate to complete */
20962306a36Sopenharmony_ci	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
21062306a36Sopenharmony_ci	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
21162306a36Sopenharmony_ci	radeon_ring_write(ring, 0xff << 16); /* retry */
21262306a36Sopenharmony_ci	radeon_ring_write(ring, 1 << vm_id); /* mask */
21362306a36Sopenharmony_ci	radeon_ring_write(ring, 0); /* value */
21462306a36Sopenharmony_ci	radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci/**
21862306a36Sopenharmony_ci * si_copy_dma - copy pages using the DMA engine
21962306a36Sopenharmony_ci *
22062306a36Sopenharmony_ci * @rdev: radeon_device pointer
22162306a36Sopenharmony_ci * @src_offset: src GPU address
22262306a36Sopenharmony_ci * @dst_offset: dst GPU address
22362306a36Sopenharmony_ci * @num_gpu_pages: number of GPU pages to xfer
22462306a36Sopenharmony_ci * @resv: reservation object to sync to
22562306a36Sopenharmony_ci *
22662306a36Sopenharmony_ci * Copy GPU paging using the DMA engine (SI).
22762306a36Sopenharmony_ci * Used by the radeon ttm implementation to move pages if
22862306a36Sopenharmony_ci * registered as the asic copy callback.
22962306a36Sopenharmony_ci */
23062306a36Sopenharmony_cistruct radeon_fence *si_copy_dma(struct radeon_device *rdev,
23162306a36Sopenharmony_ci				 uint64_t src_offset, uint64_t dst_offset,
23262306a36Sopenharmony_ci				 unsigned num_gpu_pages,
23362306a36Sopenharmony_ci				 struct dma_resv *resv)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct radeon_fence *fence;
23662306a36Sopenharmony_ci	struct radeon_sync sync;
23762306a36Sopenharmony_ci	int ring_index = rdev->asic->copy.dma_ring_index;
23862306a36Sopenharmony_ci	struct radeon_ring *ring = &rdev->ring[ring_index];
23962306a36Sopenharmony_ci	u32 size_in_bytes, cur_size_in_bytes;
24062306a36Sopenharmony_ci	int i, num_loops;
24162306a36Sopenharmony_ci	int r = 0;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	radeon_sync_create(&sync);
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
24662306a36Sopenharmony_ci	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
24762306a36Sopenharmony_ci	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
24862306a36Sopenharmony_ci	if (r) {
24962306a36Sopenharmony_ci		DRM_ERROR("radeon: moving bo (%d).\n", r);
25062306a36Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
25162306a36Sopenharmony_ci		return ERR_PTR(r);
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	radeon_sync_resv(rdev, &sync, resv, false);
25562306a36Sopenharmony_ci	radeon_sync_rings(rdev, &sync, ring->idx);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	for (i = 0; i < num_loops; i++) {
25862306a36Sopenharmony_ci		cur_size_in_bytes = size_in_bytes;
25962306a36Sopenharmony_ci		if (cur_size_in_bytes > 0xFFFFF)
26062306a36Sopenharmony_ci			cur_size_in_bytes = 0xFFFFF;
26162306a36Sopenharmony_ci		size_in_bytes -= cur_size_in_bytes;
26262306a36Sopenharmony_ci		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
26362306a36Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(dst_offset));
26462306a36Sopenharmony_ci		radeon_ring_write(ring, lower_32_bits(src_offset));
26562306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
26662306a36Sopenharmony_ci		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
26762306a36Sopenharmony_ci		src_offset += cur_size_in_bytes;
26862306a36Sopenharmony_ci		dst_offset += cur_size_in_bytes;
26962306a36Sopenharmony_ci	}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	r = radeon_fence_emit(rdev, &fence, ring->idx);
27262306a36Sopenharmony_ci	if (r) {
27362306a36Sopenharmony_ci		radeon_ring_unlock_undo(rdev, ring);
27462306a36Sopenharmony_ci		radeon_sync_free(rdev, &sync, NULL);
27562306a36Sopenharmony_ci		return ERR_PTR(r);
27662306a36Sopenharmony_ci	}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	radeon_ring_unlock_commit(rdev, ring, false);
27962306a36Sopenharmony_ci	radeon_sync_free(rdev, &sync, fence);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	return fence;
28262306a36Sopenharmony_ci}
28362306a36Sopenharmony_ci
284