18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation
78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
218c2ecf20Sopenharmony_ci *
228c2ecf20Sopenharmony_ci * Authors: Alex Deucher
238c2ecf20Sopenharmony_ci */
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#include "amdgpu.h"
268c2ecf20Sopenharmony_ci#include "amdgpu_trace.h"
278c2ecf20Sopenharmony_ci#include "si.h"
288c2ecf20Sopenharmony_ci#include "sid.h"
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ciconst u32 sdma_offsets[SDMA_MAX_INSTANCE] =
318c2ecf20Sopenharmony_ci{
328c2ecf20Sopenharmony_ci	DMA0_REGISTER_OFFSET,
338c2ecf20Sopenharmony_ci	DMA1_REGISTER_OFFSET
348c2ecf20Sopenharmony_ci};
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_cistatic void si_dma_set_ring_funcs(struct amdgpu_device *adev);
378c2ecf20Sopenharmony_cistatic void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
388c2ecf20Sopenharmony_cistatic void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
398c2ecf20Sopenharmony_cistatic void si_dma_set_irq_funcs(struct amdgpu_device *adev);
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	return ring->adev->wb.wb[ring->rptr_offs>>2];
448c2ecf20Sopenharmony_ci}
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistatic uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
478c2ecf20Sopenharmony_ci{
488c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = ring->adev;
498c2ecf20Sopenharmony_ci	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = ring->adev;
578c2ecf20Sopenharmony_ci	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	WREG32(DMA_RB_WPTR + sdma_offsets[me],
608c2ecf20Sopenharmony_ci	       (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
648c2ecf20Sopenharmony_ci				struct amdgpu_job *job,
658c2ecf20Sopenharmony_ci				struct amdgpu_ib *ib,
668c2ecf20Sopenharmony_ci				uint32_t flags)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
698c2ecf20Sopenharmony_ci	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
708c2ecf20Sopenharmony_ci	 * Pad as necessary with NOPs.
718c2ecf20Sopenharmony_ci	 */
728c2ecf20Sopenharmony_ci	while ((lower_32_bits(ring->wptr) & 7) != 5)
738c2ecf20Sopenharmony_ci		amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
748c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0));
758c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
768c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci/**
818c2ecf20Sopenharmony_ci * si_dma_ring_emit_fence - emit a fence on the DMA ring
828c2ecf20Sopenharmony_ci *
838c2ecf20Sopenharmony_ci * @ring: amdgpu ring pointer
848c2ecf20Sopenharmony_ci * @fence: amdgpu fence object
858c2ecf20Sopenharmony_ci *
868c2ecf20Sopenharmony_ci * Add a DMA fence packet to the ring to write
878c2ecf20Sopenharmony_ci * the fence seq number and DMA trap packet to generate
888c2ecf20Sopenharmony_ci * an interrupt if needed (VI).
898c2ecf20Sopenharmony_ci */
908c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
918c2ecf20Sopenharmony_ci				      unsigned flags)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
958c2ecf20Sopenharmony_ci	/* write the fence */
968c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
978c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, addr & 0xfffffffc);
988c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
998c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, seq);
1008c2ecf20Sopenharmony_ci	/* optionally write high bits as well */
1018c2ecf20Sopenharmony_ci	if (write64bit) {
1028c2ecf20Sopenharmony_ci		addr += 4;
1038c2ecf20Sopenharmony_ci		amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
1048c2ecf20Sopenharmony_ci		amdgpu_ring_write(ring, addr & 0xfffffffc);
1058c2ecf20Sopenharmony_ci		amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
1068c2ecf20Sopenharmony_ci		amdgpu_ring_write(ring, upper_32_bits(seq));
1078c2ecf20Sopenharmony_ci	}
1088c2ecf20Sopenharmony_ci	/* generate an interrupt */
1098c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0));
1108c2ecf20Sopenharmony_ci}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_cistatic void si_dma_stop(struct amdgpu_device *adev)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring;
1158c2ecf20Sopenharmony_ci	u32 rb_cntl;
1168c2ecf20Sopenharmony_ci	unsigned i;
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++) {
1198c2ecf20Sopenharmony_ci		ring = &adev->sdma.instance[i].ring;
1208c2ecf20Sopenharmony_ci		/* dma0 */
1218c2ecf20Sopenharmony_ci		rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
1228c2ecf20Sopenharmony_ci		rb_cntl &= ~DMA_RB_ENABLE;
1238c2ecf20Sopenharmony_ci		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci		if (adev->mman.buffer_funcs_ring == ring)
1268c2ecf20Sopenharmony_ci			amdgpu_ttm_set_buffer_funcs_status(adev, false);
1278c2ecf20Sopenharmony_ci	}
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_cistatic int si_dma_start(struct amdgpu_device *adev)
1318c2ecf20Sopenharmony_ci{
1328c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring;
1338c2ecf20Sopenharmony_ci	u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz;
1348c2ecf20Sopenharmony_ci	int i, r;
1358c2ecf20Sopenharmony_ci	uint64_t rptr_addr;
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++) {
1388c2ecf20Sopenharmony_ci		ring = &adev->sdma.instance[i].ring;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
1418c2ecf20Sopenharmony_ci		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci		/* Set ring buffer size in dwords */
1448c2ecf20Sopenharmony_ci		rb_bufsz = order_base_2(ring->ring_size / 4);
1458c2ecf20Sopenharmony_ci		rb_cntl = rb_bufsz << 1;
1468c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
1478c2ecf20Sopenharmony_ci		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1488c2ecf20Sopenharmony_ci#endif
1498c2ecf20Sopenharmony_ci		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci		/* Initialize the ring buffer's read and write pointers */
1528c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
1538c2ecf20Sopenharmony_ci		WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci		rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
1588c2ecf20Sopenharmony_ci		WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci		rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci		WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci		/* enable DMA IBs */
1658c2ecf20Sopenharmony_ci		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1668c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
1678c2ecf20Sopenharmony_ci		ib_cntl |= DMA_IB_SWAP_ENABLE;
1688c2ecf20Sopenharmony_ci#endif
1698c2ecf20Sopenharmony_ci		WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci		dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
1728c2ecf20Sopenharmony_ci		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1738c2ecf20Sopenharmony_ci		WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		ring->wptr = 0;
1768c2ecf20Sopenharmony_ci		WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
1778c2ecf20Sopenharmony_ci		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci		ring->sched.ready = true;
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci		r = amdgpu_ring_test_helper(ring);
1828c2ecf20Sopenharmony_ci		if (r)
1838c2ecf20Sopenharmony_ci			return r;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci		if (adev->mman.buffer_funcs_ring == ring)
1868c2ecf20Sopenharmony_ci			amdgpu_ttm_set_buffer_funcs_status(adev, true);
1878c2ecf20Sopenharmony_ci	}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	return 0;
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci/**
1938c2ecf20Sopenharmony_ci * si_dma_ring_test_ring - simple async dma engine test
1948c2ecf20Sopenharmony_ci *
1958c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information
1968c2ecf20Sopenharmony_ci *
1978c2ecf20Sopenharmony_ci * Test the DMA engine by writing using it to write an
1988c2ecf20Sopenharmony_ci * value to memory. (VI).
1998c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure.
2008c2ecf20Sopenharmony_ci */
2018c2ecf20Sopenharmony_cistatic int si_dma_ring_test_ring(struct amdgpu_ring *ring)
2028c2ecf20Sopenharmony_ci{
2038c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = ring->adev;
2048c2ecf20Sopenharmony_ci	unsigned i;
2058c2ecf20Sopenharmony_ci	unsigned index;
2068c2ecf20Sopenharmony_ci	int r;
2078c2ecf20Sopenharmony_ci	u32 tmp;
2088c2ecf20Sopenharmony_ci	u64 gpu_addr;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	r = amdgpu_device_wb_get(adev, &index);
2118c2ecf20Sopenharmony_ci	if (r)
2128c2ecf20Sopenharmony_ci		return r;
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	gpu_addr = adev->wb.gpu_addr + (index * 4);
2158c2ecf20Sopenharmony_ci	tmp = 0xCAFEDEAD;
2168c2ecf20Sopenharmony_ci	adev->wb.wb[index] = cpu_to_le32(tmp);
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	r = amdgpu_ring_alloc(ring, 4);
2198c2ecf20Sopenharmony_ci	if (r)
2208c2ecf20Sopenharmony_ci		goto error_free_wb;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
2238c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
2248c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
2258c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, 0xDEADBEEF);
2268c2ecf20Sopenharmony_ci	amdgpu_ring_commit(ring);
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	for (i = 0; i < adev->usec_timeout; i++) {
2298c2ecf20Sopenharmony_ci		tmp = le32_to_cpu(adev->wb.wb[index]);
2308c2ecf20Sopenharmony_ci		if (tmp == 0xDEADBEEF)
2318c2ecf20Sopenharmony_ci			break;
2328c2ecf20Sopenharmony_ci		udelay(1);
2338c2ecf20Sopenharmony_ci	}
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	if (i >= adev->usec_timeout)
2368c2ecf20Sopenharmony_ci		r = -ETIMEDOUT;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_cierror_free_wb:
2398c2ecf20Sopenharmony_ci	amdgpu_device_wb_free(adev, index);
2408c2ecf20Sopenharmony_ci	return r;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/**
2448c2ecf20Sopenharmony_ci * si_dma_ring_test_ib - test an IB on the DMA engine
2458c2ecf20Sopenharmony_ci *
2468c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information
2478c2ecf20Sopenharmony_ci *
2488c2ecf20Sopenharmony_ci * Test a simple IB in the DMA ring (VI).
2498c2ecf20Sopenharmony_ci * Returns 0 on success, error on failure.
2508c2ecf20Sopenharmony_ci */
2518c2ecf20Sopenharmony_cistatic int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2528c2ecf20Sopenharmony_ci{
2538c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = ring->adev;
2548c2ecf20Sopenharmony_ci	struct amdgpu_ib ib;
2558c2ecf20Sopenharmony_ci	struct dma_fence *f = NULL;
2568c2ecf20Sopenharmony_ci	unsigned index;
2578c2ecf20Sopenharmony_ci	u32 tmp = 0;
2588c2ecf20Sopenharmony_ci	u64 gpu_addr;
2598c2ecf20Sopenharmony_ci	long r;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	r = amdgpu_device_wb_get(adev, &index);
2628c2ecf20Sopenharmony_ci	if (r)
2638c2ecf20Sopenharmony_ci		return r;
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	gpu_addr = adev->wb.gpu_addr + (index * 4);
2668c2ecf20Sopenharmony_ci	tmp = 0xCAFEDEAD;
2678c2ecf20Sopenharmony_ci	adev->wb.wb[index] = cpu_to_le32(tmp);
2688c2ecf20Sopenharmony_ci	memset(&ib, 0, sizeof(ib));
2698c2ecf20Sopenharmony_ci	r = amdgpu_ib_get(adev, NULL, 256,
2708c2ecf20Sopenharmony_ci					AMDGPU_IB_POOL_DIRECT, &ib);
2718c2ecf20Sopenharmony_ci	if (r)
2728c2ecf20Sopenharmony_ci		goto err0;
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
2758c2ecf20Sopenharmony_ci	ib.ptr[1] = lower_32_bits(gpu_addr);
2768c2ecf20Sopenharmony_ci	ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff;
2778c2ecf20Sopenharmony_ci	ib.ptr[3] = 0xDEADBEEF;
2788c2ecf20Sopenharmony_ci	ib.length_dw = 4;
2798c2ecf20Sopenharmony_ci	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
2808c2ecf20Sopenharmony_ci	if (r)
2818c2ecf20Sopenharmony_ci		goto err1;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	r = dma_fence_wait_timeout(f, false, timeout);
2848c2ecf20Sopenharmony_ci	if (r == 0) {
2858c2ecf20Sopenharmony_ci		r = -ETIMEDOUT;
2868c2ecf20Sopenharmony_ci		goto err1;
2878c2ecf20Sopenharmony_ci	} else if (r < 0) {
2888c2ecf20Sopenharmony_ci		goto err1;
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci	tmp = le32_to_cpu(adev->wb.wb[index]);
2918c2ecf20Sopenharmony_ci	if (tmp == 0xDEADBEEF)
2928c2ecf20Sopenharmony_ci		r = 0;
2938c2ecf20Sopenharmony_ci	else
2948c2ecf20Sopenharmony_ci		r = -EINVAL;
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_cierr1:
2978c2ecf20Sopenharmony_ci	amdgpu_ib_free(adev, &ib, NULL);
2988c2ecf20Sopenharmony_ci	dma_fence_put(f);
2998c2ecf20Sopenharmony_cierr0:
3008c2ecf20Sopenharmony_ci	amdgpu_device_wb_free(adev, index);
3018c2ecf20Sopenharmony_ci	return r;
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/**
3058c2ecf20Sopenharmony_ci * cik_dma_vm_copy_pte - update PTEs by copying them from the GART
3068c2ecf20Sopenharmony_ci *
3078c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3088c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3098c2ecf20Sopenharmony_ci * @src: src addr to copy from
3108c2ecf20Sopenharmony_ci * @count: number of page entries to update
3118c2ecf20Sopenharmony_ci *
3128c2ecf20Sopenharmony_ci * Update PTEs by copying them from the GART using DMA (SI).
3138c2ecf20Sopenharmony_ci */
3148c2ecf20Sopenharmony_cistatic void si_dma_vm_copy_pte(struct amdgpu_ib *ib,
3158c2ecf20Sopenharmony_ci			       uint64_t pe, uint64_t src,
3168c2ecf20Sopenharmony_ci			       unsigned count)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	unsigned bytes = count * 8;
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
3218c2ecf20Sopenharmony_ci					      1, 0, 0, bytes);
3228c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
3238c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(src);
3248c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3258c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
3268c2ecf20Sopenharmony_ci}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci/**
3298c2ecf20Sopenharmony_ci * si_dma_vm_write_pte - update PTEs by writing them manually
3308c2ecf20Sopenharmony_ci *
3318c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3328c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3338c2ecf20Sopenharmony_ci * @value: dst addr to write into pe
3348c2ecf20Sopenharmony_ci * @count: number of page entries to update
3358c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
3368c2ecf20Sopenharmony_ci *
3378c2ecf20Sopenharmony_ci * Update PTEs by writing them manually using DMA (SI).
3388c2ecf20Sopenharmony_ci */
3398c2ecf20Sopenharmony_cistatic void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
3408c2ecf20Sopenharmony_ci				uint64_t value, unsigned count,
3418c2ecf20Sopenharmony_ci				uint32_t incr)
3428c2ecf20Sopenharmony_ci{
3438c2ecf20Sopenharmony_ci	unsigned ndw = count * 2;
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3468c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
3478c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3488c2ecf20Sopenharmony_ci	for (; ndw > 0; ndw -= 2) {
3498c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(value);
3508c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
3518c2ecf20Sopenharmony_ci		value += incr;
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_ci}
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci/**
3568c2ecf20Sopenharmony_ci * si_dma_vm_set_pte_pde - update the page tables using sDMA
3578c2ecf20Sopenharmony_ci *
3588c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands
3598c2ecf20Sopenharmony_ci * @pe: addr of the page entry
3608c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe
3618c2ecf20Sopenharmony_ci * @count: number of page entries to update
3628c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes
3638c2ecf20Sopenharmony_ci * @flags: access flags
3648c2ecf20Sopenharmony_ci *
3658c2ecf20Sopenharmony_ci * Update the page tables using sDMA (CIK).
3668c2ecf20Sopenharmony_ci */
3678c2ecf20Sopenharmony_cistatic void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,
3688c2ecf20Sopenharmony_ci				     uint64_t pe,
3698c2ecf20Sopenharmony_ci				     uint64_t addr, unsigned count,
3708c2ecf20Sopenharmony_ci				     uint32_t incr, uint64_t flags)
3718c2ecf20Sopenharmony_ci{
3728c2ecf20Sopenharmony_ci	uint64_t value;
3738c2ecf20Sopenharmony_ci	unsigned ndw;
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	while (count) {
3768c2ecf20Sopenharmony_ci		ndw = count * 2;
3778c2ecf20Sopenharmony_ci		if (ndw > 0xFFFFE)
3788c2ecf20Sopenharmony_ci			ndw = 0xFFFFE;
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci		if (flags & AMDGPU_PTE_VALID)
3818c2ecf20Sopenharmony_ci			value = addr;
3828c2ecf20Sopenharmony_ci		else
3838c2ecf20Sopenharmony_ci			value = 0;
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci		/* for physically contiguous pages (vram) */
3868c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
3878c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = pe; /* dst addr */
3888c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3898c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
3908c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(flags);
3918c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = value; /* value */
3928c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = upper_32_bits(value);
3938c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = incr; /* increment size */
3948c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = 0;
3958c2ecf20Sopenharmony_ci		pe += ndw * 4;
3968c2ecf20Sopenharmony_ci		addr += (ndw / 2) * incr;
3978c2ecf20Sopenharmony_ci		count -= ndw / 2;
3988c2ecf20Sopenharmony_ci	}
3998c2ecf20Sopenharmony_ci}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci/**
4028c2ecf20Sopenharmony_ci * si_dma_pad_ib - pad the IB to the required number of dw
4038c2ecf20Sopenharmony_ci *
4048c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with padding
4058c2ecf20Sopenharmony_ci *
4068c2ecf20Sopenharmony_ci */
4078c2ecf20Sopenharmony_cistatic void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
4088c2ecf20Sopenharmony_ci{
4098c2ecf20Sopenharmony_ci	while (ib->length_dw & 0x7)
4108c2ecf20Sopenharmony_ci		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci/**
4148c2ecf20Sopenharmony_ci * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
4158c2ecf20Sopenharmony_ci *
4168c2ecf20Sopenharmony_ci * @ring: amdgpu_ring pointer
4178c2ecf20Sopenharmony_ci *
4188c2ecf20Sopenharmony_ci * Make sure all previous operations are completed (CIK).
4198c2ecf20Sopenharmony_ci */
4208c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4218c2ecf20Sopenharmony_ci{
4228c2ecf20Sopenharmony_ci	uint32_t seq = ring->fence_drv.sync_seq;
4238c2ecf20Sopenharmony_ci	uint64_t addr = ring->fence_drv.gpu_addr;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	/* wait for idle */
4268c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) |
4278c2ecf20Sopenharmony_ci			  (1 << 27)); /* Poll memory */
4288c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, lower_32_bits(addr));
4298c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */
4308c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, 0xffffffff); /* mask */
4318c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, seq); /* value */
4328c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */
4338c2ecf20Sopenharmony_ci}
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci/**
4368c2ecf20Sopenharmony_ci * si_dma_ring_emit_vm_flush - cik vm flush using sDMA
4378c2ecf20Sopenharmony_ci *
4388c2ecf20Sopenharmony_ci * @ring: amdgpu_ring pointer
4398c2ecf20Sopenharmony_ci * @vm: amdgpu_vm pointer
4408c2ecf20Sopenharmony_ci *
4418c2ecf20Sopenharmony_ci * Update the page table base and flush the VM TLB
4428c2ecf20Sopenharmony_ci * using sDMA (VI).
4438c2ecf20Sopenharmony_ci */
4448c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
4458c2ecf20Sopenharmony_ci				      unsigned vmid, uint64_t pd_addr)
4468c2ecf20Sopenharmony_ci{
4478c2ecf20Sopenharmony_ci	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	/* wait for invalidate to complete */
4508c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
4518c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
4528c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, 0xff << 16); /* retry */
4538c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, 1 << vmid); /* mask */
4548c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, 0); /* value */
4558c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
4598c2ecf20Sopenharmony_ci				  uint32_t reg, uint32_t val)
4608c2ecf20Sopenharmony_ci{
4618c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4628c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, (0xf << 16) | reg);
4638c2ecf20Sopenharmony_ci	amdgpu_ring_write(ring, val);
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_cistatic int si_dma_early_init(void *handle)
4678c2ecf20Sopenharmony_ci{
4688c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	adev->sdma.num_instances = 2;
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	si_dma_set_ring_funcs(adev);
4738c2ecf20Sopenharmony_ci	si_dma_set_buffer_funcs(adev);
4748c2ecf20Sopenharmony_ci	si_dma_set_vm_pte_funcs(adev);
4758c2ecf20Sopenharmony_ci	si_dma_set_irq_funcs(adev);
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	return 0;
4788c2ecf20Sopenharmony_ci}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_cistatic int si_dma_sw_init(void *handle)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	struct amdgpu_ring *ring;
4838c2ecf20Sopenharmony_ci	int r, i;
4848c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	/* DMA0 trap event */
4878c2ecf20Sopenharmony_ci	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
4888c2ecf20Sopenharmony_ci			      &adev->sdma.trap_irq);
4898c2ecf20Sopenharmony_ci	if (r)
4908c2ecf20Sopenharmony_ci		return r;
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	/* DMA1 trap event */
4938c2ecf20Sopenharmony_ci	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
4948c2ecf20Sopenharmony_ci			      &adev->sdma.trap_irq);
4958c2ecf20Sopenharmony_ci	if (r)
4968c2ecf20Sopenharmony_ci		return r;
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++) {
4998c2ecf20Sopenharmony_ci		ring = &adev->sdma.instance[i].ring;
5008c2ecf20Sopenharmony_ci		ring->ring_obj = NULL;
5018c2ecf20Sopenharmony_ci		ring->use_doorbell = false;
5028c2ecf20Sopenharmony_ci		sprintf(ring->name, "sdma%d", i);
5038c2ecf20Sopenharmony_ci		r = amdgpu_ring_init(adev, ring, 1024,
5048c2ecf20Sopenharmony_ci				     &adev->sdma.trap_irq,
5058c2ecf20Sopenharmony_ci				     (i == 0) ?
5068c2ecf20Sopenharmony_ci				     AMDGPU_SDMA_IRQ_INSTANCE0 :
5078c2ecf20Sopenharmony_ci				     AMDGPU_SDMA_IRQ_INSTANCE1,
5088c2ecf20Sopenharmony_ci				     AMDGPU_RING_PRIO_DEFAULT);
5098c2ecf20Sopenharmony_ci		if (r)
5108c2ecf20Sopenharmony_ci			return r;
5118c2ecf20Sopenharmony_ci	}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	return r;
5148c2ecf20Sopenharmony_ci}
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_cistatic int si_dma_sw_fini(void *handle)
5178c2ecf20Sopenharmony_ci{
5188c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5198c2ecf20Sopenharmony_ci	int i;
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++)
5228c2ecf20Sopenharmony_ci		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	return 0;
5258c2ecf20Sopenharmony_ci}
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_cistatic int si_dma_hw_init(void *handle)
5288c2ecf20Sopenharmony_ci{
5298c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	return si_dma_start(adev);
5328c2ecf20Sopenharmony_ci}
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_cistatic int si_dma_hw_fini(void *handle)
5358c2ecf20Sopenharmony_ci{
5368c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	si_dma_stop(adev);
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	return 0;
5418c2ecf20Sopenharmony_ci}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_cistatic int si_dma_suspend(void *handle)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	return si_dma_hw_fini(adev);
5488c2ecf20Sopenharmony_ci}
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_cistatic int si_dma_resume(void *handle)
5518c2ecf20Sopenharmony_ci{
5528c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	return si_dma_hw_init(adev);
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_cistatic bool si_dma_is_idle(void *handle)
5588c2ecf20Sopenharmony_ci{
5598c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5608c2ecf20Sopenharmony_ci	u32 tmp = RREG32(SRBM_STATUS2);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
5638c2ecf20Sopenharmony_ci	    return false;
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	return true;
5668c2ecf20Sopenharmony_ci}
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_cistatic int si_dma_wait_for_idle(void *handle)
5698c2ecf20Sopenharmony_ci{
5708c2ecf20Sopenharmony_ci	unsigned i;
5718c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	for (i = 0; i < adev->usec_timeout; i++) {
5748c2ecf20Sopenharmony_ci		if (si_dma_is_idle(handle))
5758c2ecf20Sopenharmony_ci			return 0;
5768c2ecf20Sopenharmony_ci		udelay(1);
5778c2ecf20Sopenharmony_ci	}
5788c2ecf20Sopenharmony_ci	return -ETIMEDOUT;
5798c2ecf20Sopenharmony_ci}
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_cistatic int si_dma_soft_reset(void *handle)
5828c2ecf20Sopenharmony_ci{
5838c2ecf20Sopenharmony_ci	DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
5848c2ecf20Sopenharmony_ci	return 0;
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_cistatic int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
5888c2ecf20Sopenharmony_ci					struct amdgpu_irq_src *src,
5898c2ecf20Sopenharmony_ci					unsigned type,
5908c2ecf20Sopenharmony_ci					enum amdgpu_interrupt_state state)
5918c2ecf20Sopenharmony_ci{
5928c2ecf20Sopenharmony_ci	u32 sdma_cntl;
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	switch (type) {
5958c2ecf20Sopenharmony_ci	case AMDGPU_SDMA_IRQ_INSTANCE0:
5968c2ecf20Sopenharmony_ci		switch (state) {
5978c2ecf20Sopenharmony_ci		case AMDGPU_IRQ_STATE_DISABLE:
5988c2ecf20Sopenharmony_ci			sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
5998c2ecf20Sopenharmony_ci			sdma_cntl &= ~TRAP_ENABLE;
6008c2ecf20Sopenharmony_ci			WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
6018c2ecf20Sopenharmony_ci			break;
6028c2ecf20Sopenharmony_ci		case AMDGPU_IRQ_STATE_ENABLE:
6038c2ecf20Sopenharmony_ci			sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
6048c2ecf20Sopenharmony_ci			sdma_cntl |= TRAP_ENABLE;
6058c2ecf20Sopenharmony_ci			WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
6068c2ecf20Sopenharmony_ci			break;
6078c2ecf20Sopenharmony_ci		default:
6088c2ecf20Sopenharmony_ci			break;
6098c2ecf20Sopenharmony_ci		}
6108c2ecf20Sopenharmony_ci		break;
6118c2ecf20Sopenharmony_ci	case AMDGPU_SDMA_IRQ_INSTANCE1:
6128c2ecf20Sopenharmony_ci		switch (state) {
6138c2ecf20Sopenharmony_ci		case AMDGPU_IRQ_STATE_DISABLE:
6148c2ecf20Sopenharmony_ci			sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
6158c2ecf20Sopenharmony_ci			sdma_cntl &= ~TRAP_ENABLE;
6168c2ecf20Sopenharmony_ci			WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
6178c2ecf20Sopenharmony_ci			break;
6188c2ecf20Sopenharmony_ci		case AMDGPU_IRQ_STATE_ENABLE:
6198c2ecf20Sopenharmony_ci			sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
6208c2ecf20Sopenharmony_ci			sdma_cntl |= TRAP_ENABLE;
6218c2ecf20Sopenharmony_ci			WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
6228c2ecf20Sopenharmony_ci			break;
6238c2ecf20Sopenharmony_ci		default:
6248c2ecf20Sopenharmony_ci			break;
6258c2ecf20Sopenharmony_ci		}
6268c2ecf20Sopenharmony_ci		break;
6278c2ecf20Sopenharmony_ci	default:
6288c2ecf20Sopenharmony_ci		break;
6298c2ecf20Sopenharmony_ci	}
6308c2ecf20Sopenharmony_ci	return 0;
6318c2ecf20Sopenharmony_ci}
6328c2ecf20Sopenharmony_ci
6338c2ecf20Sopenharmony_cistatic int si_dma_process_trap_irq(struct amdgpu_device *adev,
6348c2ecf20Sopenharmony_ci				      struct amdgpu_irq_src *source,
6358c2ecf20Sopenharmony_ci				      struct amdgpu_iv_entry *entry)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	if (entry->src_id == 224)
6388c2ecf20Sopenharmony_ci		amdgpu_fence_process(&adev->sdma.instance[0].ring);
6398c2ecf20Sopenharmony_ci	else
6408c2ecf20Sopenharmony_ci		amdgpu_fence_process(&adev->sdma.instance[1].ring);
6418c2ecf20Sopenharmony_ci	return 0;
6428c2ecf20Sopenharmony_ci}
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_cistatic int si_dma_set_clockgating_state(void *handle,
6458c2ecf20Sopenharmony_ci					  enum amd_clockgating_state state)
6468c2ecf20Sopenharmony_ci{
6478c2ecf20Sopenharmony_ci	u32 orig, data, offset;
6488c2ecf20Sopenharmony_ci	int i;
6498c2ecf20Sopenharmony_ci	bool enable;
6508c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	enable = (state == AMD_CG_STATE_GATE);
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
6558c2ecf20Sopenharmony_ci		for (i = 0; i < adev->sdma.num_instances; i++) {
6568c2ecf20Sopenharmony_ci			if (i == 0)
6578c2ecf20Sopenharmony_ci				offset = DMA0_REGISTER_OFFSET;
6588c2ecf20Sopenharmony_ci			else
6598c2ecf20Sopenharmony_ci				offset = DMA1_REGISTER_OFFSET;
6608c2ecf20Sopenharmony_ci			orig = data = RREG32(DMA_POWER_CNTL + offset);
6618c2ecf20Sopenharmony_ci			data &= ~MEM_POWER_OVERRIDE;
6628c2ecf20Sopenharmony_ci			if (data != orig)
6638c2ecf20Sopenharmony_ci				WREG32(DMA_POWER_CNTL + offset, data);
6648c2ecf20Sopenharmony_ci			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
6658c2ecf20Sopenharmony_ci		}
6668c2ecf20Sopenharmony_ci	} else {
6678c2ecf20Sopenharmony_ci		for (i = 0; i < adev->sdma.num_instances; i++) {
6688c2ecf20Sopenharmony_ci			if (i == 0)
6698c2ecf20Sopenharmony_ci				offset = DMA0_REGISTER_OFFSET;
6708c2ecf20Sopenharmony_ci			else
6718c2ecf20Sopenharmony_ci				offset = DMA1_REGISTER_OFFSET;
6728c2ecf20Sopenharmony_ci			orig = data = RREG32(DMA_POWER_CNTL + offset);
6738c2ecf20Sopenharmony_ci			data |= MEM_POWER_OVERRIDE;
6748c2ecf20Sopenharmony_ci			if (data != orig)
6758c2ecf20Sopenharmony_ci				WREG32(DMA_POWER_CNTL + offset, data);
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci			orig = data = RREG32(DMA_CLK_CTRL + offset);
6788c2ecf20Sopenharmony_ci			data = 0xff000000;
6798c2ecf20Sopenharmony_ci			if (data != orig)
6808c2ecf20Sopenharmony_ci				WREG32(DMA_CLK_CTRL + offset, data);
6818c2ecf20Sopenharmony_ci		}
6828c2ecf20Sopenharmony_ci	}
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci	return 0;
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_cistatic int si_dma_set_powergating_state(void *handle,
6888c2ecf20Sopenharmony_ci					  enum amd_powergating_state state)
6898c2ecf20Sopenharmony_ci{
6908c2ecf20Sopenharmony_ci	u32 tmp;
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	WREG32(DMA_PGFSM_WRITE,  0x00002000);
6958c2ecf20Sopenharmony_ci	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	for (tmp = 0; tmp < 5; tmp++)
6988c2ecf20Sopenharmony_ci		WREG32(DMA_PGFSM_WRITE, 0);
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	return 0;
7018c2ecf20Sopenharmony_ci}
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_cistatic const struct amd_ip_funcs si_dma_ip_funcs = {
7048c2ecf20Sopenharmony_ci	.name = "si_dma",
7058c2ecf20Sopenharmony_ci	.early_init = si_dma_early_init,
7068c2ecf20Sopenharmony_ci	.late_init = NULL,
7078c2ecf20Sopenharmony_ci	.sw_init = si_dma_sw_init,
7088c2ecf20Sopenharmony_ci	.sw_fini = si_dma_sw_fini,
7098c2ecf20Sopenharmony_ci	.hw_init = si_dma_hw_init,
7108c2ecf20Sopenharmony_ci	.hw_fini = si_dma_hw_fini,
7118c2ecf20Sopenharmony_ci	.suspend = si_dma_suspend,
7128c2ecf20Sopenharmony_ci	.resume = si_dma_resume,
7138c2ecf20Sopenharmony_ci	.is_idle = si_dma_is_idle,
7148c2ecf20Sopenharmony_ci	.wait_for_idle = si_dma_wait_for_idle,
7158c2ecf20Sopenharmony_ci	.soft_reset = si_dma_soft_reset,
7168c2ecf20Sopenharmony_ci	.set_clockgating_state = si_dma_set_clockgating_state,
7178c2ecf20Sopenharmony_ci	.set_powergating_state = si_dma_set_powergating_state,
7188c2ecf20Sopenharmony_ci};
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_cistatic const struct amdgpu_ring_funcs si_dma_ring_funcs = {
7218c2ecf20Sopenharmony_ci	.type = AMDGPU_RING_TYPE_SDMA,
7228c2ecf20Sopenharmony_ci	.align_mask = 0xf,
7238c2ecf20Sopenharmony_ci	.nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0),
7248c2ecf20Sopenharmony_ci	.support_64bit_ptrs = false,
7258c2ecf20Sopenharmony_ci	.get_rptr = si_dma_ring_get_rptr,
7268c2ecf20Sopenharmony_ci	.get_wptr = si_dma_ring_get_wptr,
7278c2ecf20Sopenharmony_ci	.set_wptr = si_dma_ring_set_wptr,
7288c2ecf20Sopenharmony_ci	.emit_frame_size =
7298c2ecf20Sopenharmony_ci		3 + 3 + /* hdp flush / invalidate */
7308c2ecf20Sopenharmony_ci		6 + /* si_dma_ring_emit_pipeline_sync */
7318c2ecf20Sopenharmony_ci		SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */
7328c2ecf20Sopenharmony_ci		9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
7338c2ecf20Sopenharmony_ci	.emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */
7348c2ecf20Sopenharmony_ci	.emit_ib = si_dma_ring_emit_ib,
7358c2ecf20Sopenharmony_ci	.emit_fence = si_dma_ring_emit_fence,
7368c2ecf20Sopenharmony_ci	.emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
7378c2ecf20Sopenharmony_ci	.emit_vm_flush = si_dma_ring_emit_vm_flush,
7388c2ecf20Sopenharmony_ci	.test_ring = si_dma_ring_test_ring,
7398c2ecf20Sopenharmony_ci	.test_ib = si_dma_ring_test_ib,
7408c2ecf20Sopenharmony_ci	.insert_nop = amdgpu_ring_insert_nop,
7418c2ecf20Sopenharmony_ci	.pad_ib = si_dma_ring_pad_ib,
7428c2ecf20Sopenharmony_ci	.emit_wreg = si_dma_ring_emit_wreg,
7438c2ecf20Sopenharmony_ci};
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_cistatic void si_dma_set_ring_funcs(struct amdgpu_device *adev)
7468c2ecf20Sopenharmony_ci{
7478c2ecf20Sopenharmony_ci	int i;
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++)
7508c2ecf20Sopenharmony_ci		adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs;
7518c2ecf20Sopenharmony_ci}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_cistatic const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
7548c2ecf20Sopenharmony_ci	.set = si_dma_set_trap_irq_state,
7558c2ecf20Sopenharmony_ci	.process = si_dma_process_trap_irq,
7568c2ecf20Sopenharmony_ci};
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_cistatic void si_dma_set_irq_funcs(struct amdgpu_device *adev)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
7618c2ecf20Sopenharmony_ci	adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
7628c2ecf20Sopenharmony_ci}
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci/**
7658c2ecf20Sopenharmony_ci * si_dma_emit_copy_buffer - copy buffer using the sDMA engine
7668c2ecf20Sopenharmony_ci *
7678c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information
7688c2ecf20Sopenharmony_ci * @src_offset: src GPU address
7698c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address
7708c2ecf20Sopenharmony_ci * @byte_count: number of bytes to xfer
7718c2ecf20Sopenharmony_ci *
7728c2ecf20Sopenharmony_ci * Copy GPU buffers using the DMA engine (VI).
7738c2ecf20Sopenharmony_ci * Used by the amdgpu ttm implementation to move pages if
7748c2ecf20Sopenharmony_ci * registered as the asic copy callback.
7758c2ecf20Sopenharmony_ci */
7768c2ecf20Sopenharmony_cistatic void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
7778c2ecf20Sopenharmony_ci				       uint64_t src_offset,
7788c2ecf20Sopenharmony_ci				       uint64_t dst_offset,
7798c2ecf20Sopenharmony_ci				       uint32_t byte_count,
7808c2ecf20Sopenharmony_ci				       bool tmz)
7818c2ecf20Sopenharmony_ci{
7828c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
7838c2ecf20Sopenharmony_ci					      1, 0, 0, byte_count);
7848c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
7858c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
7868c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff;
7878c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff;
7888c2ecf20Sopenharmony_ci}
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_ci/**
7918c2ecf20Sopenharmony_ci * si_dma_emit_fill_buffer - fill buffer using the sDMA engine
7928c2ecf20Sopenharmony_ci *
7938c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information
7948c2ecf20Sopenharmony_ci * @src_data: value to write to buffer
7958c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address
7968c2ecf20Sopenharmony_ci * @byte_count: number of bytes to xfer
7978c2ecf20Sopenharmony_ci *
7988c2ecf20Sopenharmony_ci * Fill GPU buffers using the DMA engine (VI).
7998c2ecf20Sopenharmony_ci */
8008c2ecf20Sopenharmony_cistatic void si_dma_emit_fill_buffer(struct amdgpu_ib *ib,
8018c2ecf20Sopenharmony_ci				       uint32_t src_data,
8028c2ecf20Sopenharmony_ci				       uint64_t dst_offset,
8038c2ecf20Sopenharmony_ci				       uint32_t byte_count)
8048c2ecf20Sopenharmony_ci{
8058c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL,
8068c2ecf20Sopenharmony_ci					      0, 0, 0, byte_count / 4);
8078c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
8088c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = src_data;
8098c2ecf20Sopenharmony_ci	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16;
8108c2ecf20Sopenharmony_ci}
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_cistatic const struct amdgpu_buffer_funcs si_dma_buffer_funcs = {
8148c2ecf20Sopenharmony_ci	.copy_max_bytes = 0xffff8,
8158c2ecf20Sopenharmony_ci	.copy_num_dw = 5,
8168c2ecf20Sopenharmony_ci	.emit_copy_buffer = si_dma_emit_copy_buffer,
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	.fill_max_bytes = 0xffff8,
8198c2ecf20Sopenharmony_ci	.fill_num_dw = 4,
8208c2ecf20Sopenharmony_ci	.emit_fill_buffer = si_dma_emit_fill_buffer,
8218c2ecf20Sopenharmony_ci};
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_cistatic void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
8248c2ecf20Sopenharmony_ci{
8258c2ecf20Sopenharmony_ci	adev->mman.buffer_funcs = &si_dma_buffer_funcs;
8268c2ecf20Sopenharmony_ci	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
8278c2ecf20Sopenharmony_ci}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_cistatic const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
8308c2ecf20Sopenharmony_ci	.copy_pte_num_dw = 5,
8318c2ecf20Sopenharmony_ci	.copy_pte = si_dma_vm_copy_pte,
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci	.write_pte = si_dma_vm_write_pte,
8348c2ecf20Sopenharmony_ci	.set_pte_pde = si_dma_vm_set_pte_pde,
8358c2ecf20Sopenharmony_ci};
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_cistatic void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
8388c2ecf20Sopenharmony_ci{
8398c2ecf20Sopenharmony_ci	unsigned i;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
8428c2ecf20Sopenharmony_ci	for (i = 0; i < adev->sdma.num_instances; i++) {
8438c2ecf20Sopenharmony_ci		adev->vm_manager.vm_pte_scheds[i] =
8448c2ecf20Sopenharmony_ci			&adev->sdma.instance[i].ring.sched;
8458c2ecf20Sopenharmony_ci	}
8468c2ecf20Sopenharmony_ci	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
8478c2ecf20Sopenharmony_ci}
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_ciconst struct amdgpu_ip_block_version si_dma_ip_block =
8508c2ecf20Sopenharmony_ci{
8518c2ecf20Sopenharmony_ci	.type = AMD_IP_BLOCK_TYPE_SDMA,
8528c2ecf20Sopenharmony_ci	.major = 1,
8538c2ecf20Sopenharmony_ci	.minor = 0,
8548c2ecf20Sopenharmony_ci	.rev = 0,
8558c2ecf20Sopenharmony_ci	.funcs = &si_dma_ip_funcs,
8568c2ecf20Sopenharmony_ci};
857