18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Authors: Alex Deucher 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "amdgpu.h" 268c2ecf20Sopenharmony_ci#include "amdgpu_trace.h" 278c2ecf20Sopenharmony_ci#include "si.h" 288c2ecf20Sopenharmony_ci#include "sid.h" 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ciconst u32 sdma_offsets[SDMA_MAX_INSTANCE] = 318c2ecf20Sopenharmony_ci{ 328c2ecf20Sopenharmony_ci DMA0_REGISTER_OFFSET, 338c2ecf20Sopenharmony_ci DMA1_REGISTER_OFFSET 348c2ecf20Sopenharmony_ci}; 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_cistatic void si_dma_set_ring_funcs(struct amdgpu_device *adev); 378c2ecf20Sopenharmony_cistatic void si_dma_set_buffer_funcs(struct amdgpu_device *adev); 388c2ecf20Sopenharmony_cistatic void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); 398c2ecf20Sopenharmony_cistatic void si_dma_set_irq_funcs(struct amdgpu_device *adev); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci return ring->adev->wb.wb[ring->rptr_offs>>2]; 448c2ecf20Sopenharmony_ci} 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cistatic uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) 478c2ecf20Sopenharmony_ci{ 488c2ecf20Sopenharmony_ci struct amdgpu_device *adev = ring->adev; 498c2ecf20Sopenharmony_ci u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistatic void si_dma_ring_set_wptr(struct amdgpu_ring *ring) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci struct amdgpu_device *adev = ring->adev; 578c2ecf20Sopenharmony_ci u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR + sdma_offsets[me], 608c2ecf20Sopenharmony_ci (lower_32_bits(ring->wptr) << 2) & 0x3fffc); 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 648c2ecf20Sopenharmony_ci struct amdgpu_job *job, 658c2ecf20Sopenharmony_ci struct amdgpu_ib *ib, 668c2ecf20Sopenharmony_ci uint32_t flags) 678c2ecf20Sopenharmony_ci{ 688c2ecf20Sopenharmony_ci unsigned vmid = AMDGPU_JOB_GET_VMID(job); 698c2ecf20Sopenharmony_ci /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 708c2ecf20Sopenharmony_ci * Pad as necessary with NOPs. 718c2ecf20Sopenharmony_ci */ 728c2ecf20Sopenharmony_ci while ((lower_32_bits(ring->wptr) & 7) != 5) 738c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); 748c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); 758c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 768c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci} 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci/** 818c2ecf20Sopenharmony_ci * si_dma_ring_emit_fence - emit a fence on the DMA ring 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * @ring: amdgpu ring pointer 848c2ecf20Sopenharmony_ci * @fence: amdgpu fence object 858c2ecf20Sopenharmony_ci * 868c2ecf20Sopenharmony_ci * Add a DMA fence packet to the ring to write 878c2ecf20Sopenharmony_ci * the fence seq number and DMA trap packet to generate 888c2ecf20Sopenharmony_ci * an interrupt if needed (VI). 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 918c2ecf20Sopenharmony_ci unsigned flags) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 958c2ecf20Sopenharmony_ci /* write the fence */ 968c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 978c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, addr & 0xfffffffc); 988c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 998c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, seq); 1008c2ecf20Sopenharmony_ci /* optionally write high bits as well */ 1018c2ecf20Sopenharmony_ci if (write64bit) { 1028c2ecf20Sopenharmony_ci addr += 4; 1038c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0)); 1048c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, addr & 0xfffffffc); 1058c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff)); 1068c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, upper_32_bits(seq)); 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci /* generate an interrupt */ 1098c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0)); 1108c2ecf20Sopenharmony_ci} 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_cistatic void si_dma_stop(struct amdgpu_device *adev) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci struct amdgpu_ring *ring; 1158c2ecf20Sopenharmony_ci u32 rb_cntl; 1168c2ecf20Sopenharmony_ci unsigned i; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 1198c2ecf20Sopenharmony_ci ring = &adev->sdma.instance[i].ring; 1208c2ecf20Sopenharmony_ci /* dma0 */ 1218c2ecf20Sopenharmony_ci rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); 1228c2ecf20Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 1238c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci if (adev->mman.buffer_funcs_ring == ring) 1268c2ecf20Sopenharmony_ci amdgpu_ttm_set_buffer_funcs_status(adev, false); 1278c2ecf20Sopenharmony_ci } 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_cistatic int si_dma_start(struct amdgpu_device *adev) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci struct amdgpu_ring *ring; 1338c2ecf20Sopenharmony_ci u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz; 1348c2ecf20Sopenharmony_ci int i, r; 1358c2ecf20Sopenharmony_ci uint64_t rptr_addr; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 1388c2ecf20Sopenharmony_ci ring = &adev->sdma.instance[i].ring; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 1418c2ecf20Sopenharmony_ci WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci /* Set ring buffer size in dwords */ 1448c2ecf20Sopenharmony_ci rb_bufsz = order_base_2(ring->ring_size / 4); 1458c2ecf20Sopenharmony_ci rb_cntl = rb_bufsz << 1; 1468c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1478c2ecf20Sopenharmony_ci rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 1488c2ecf20Sopenharmony_ci#endif 1498c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci /* Initialize the ring buffer's read and write pointers */ 1528c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); 1538c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); 1588c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci /* enable DMA IBs */ 1658c2ecf20Sopenharmony_ci ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 1668c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1678c2ecf20Sopenharmony_ci ib_cntl |= DMA_IB_SWAP_ENABLE; 1688c2ecf20Sopenharmony_ci#endif 1698c2ecf20Sopenharmony_ci WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); 1728c2ecf20Sopenharmony_ci dma_cntl &= ~CTXEMPTY_INT_ENABLE; 1738c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci ring->wptr = 0; 1768c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); 1778c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci ring->sched.ready = true; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci r = amdgpu_ring_test_helper(ring); 1828c2ecf20Sopenharmony_ci if (r) 1838c2ecf20Sopenharmony_ci return r; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci if (adev->mman.buffer_funcs_ring == ring) 1868c2ecf20Sopenharmony_ci amdgpu_ttm_set_buffer_funcs_status(adev, true); 1878c2ecf20Sopenharmony_ci } 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci return 0; 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci/** 1938c2ecf20Sopenharmony_ci * si_dma_ring_test_ring - simple async dma engine test 1948c2ecf20Sopenharmony_ci * 1958c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information 1968c2ecf20Sopenharmony_ci * 1978c2ecf20Sopenharmony_ci * Test the DMA engine by writing using it to write an 1988c2ecf20Sopenharmony_ci * value to memory. (VI). 1998c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure. 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_cistatic int si_dma_ring_test_ring(struct amdgpu_ring *ring) 2028c2ecf20Sopenharmony_ci{ 2038c2ecf20Sopenharmony_ci struct amdgpu_device *adev = ring->adev; 2048c2ecf20Sopenharmony_ci unsigned i; 2058c2ecf20Sopenharmony_ci unsigned index; 2068c2ecf20Sopenharmony_ci int r; 2078c2ecf20Sopenharmony_ci u32 tmp; 2088c2ecf20Sopenharmony_ci u64 gpu_addr; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci r = amdgpu_device_wb_get(adev, &index); 2118c2ecf20Sopenharmony_ci if (r) 2128c2ecf20Sopenharmony_ci return r; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci gpu_addr = adev->wb.gpu_addr + (index * 4); 2158c2ecf20Sopenharmony_ci tmp = 0xCAFEDEAD; 2168c2ecf20Sopenharmony_ci adev->wb.wb[index] = cpu_to_le32(tmp); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci r = amdgpu_ring_alloc(ring, 4); 2198c2ecf20Sopenharmony_ci if (r) 2208c2ecf20Sopenharmony_ci goto error_free_wb; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 2238c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 2248c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 2258c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, 0xDEADBEEF); 2268c2ecf20Sopenharmony_ci amdgpu_ring_commit(ring); 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci for (i = 0; i < adev->usec_timeout; i++) { 2298c2ecf20Sopenharmony_ci tmp = le32_to_cpu(adev->wb.wb[index]); 2308c2ecf20Sopenharmony_ci if (tmp == 0xDEADBEEF) 2318c2ecf20Sopenharmony_ci break; 2328c2ecf20Sopenharmony_ci udelay(1); 2338c2ecf20Sopenharmony_ci } 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci if (i >= adev->usec_timeout) 2368c2ecf20Sopenharmony_ci r = -ETIMEDOUT; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_cierror_free_wb: 2398c2ecf20Sopenharmony_ci amdgpu_device_wb_free(adev, index); 2408c2ecf20Sopenharmony_ci return r; 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci/** 2448c2ecf20Sopenharmony_ci * si_dma_ring_test_ib - test an IB on the DMA engine 2458c2ecf20Sopenharmony_ci * 2468c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information 2478c2ecf20Sopenharmony_ci * 2488c2ecf20Sopenharmony_ci * Test a simple IB in the DMA ring (VI). 2498c2ecf20Sopenharmony_ci * Returns 0 on success, error on failure. 2508c2ecf20Sopenharmony_ci */ 2518c2ecf20Sopenharmony_cistatic int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci struct amdgpu_device *adev = ring->adev; 2548c2ecf20Sopenharmony_ci struct amdgpu_ib ib; 2558c2ecf20Sopenharmony_ci struct dma_fence *f = NULL; 2568c2ecf20Sopenharmony_ci unsigned index; 2578c2ecf20Sopenharmony_ci u32 tmp = 0; 2588c2ecf20Sopenharmony_ci u64 gpu_addr; 2598c2ecf20Sopenharmony_ci long r; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci r = amdgpu_device_wb_get(adev, &index); 2628c2ecf20Sopenharmony_ci if (r) 2638c2ecf20Sopenharmony_ci return r; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci gpu_addr = adev->wb.gpu_addr + (index * 4); 2668c2ecf20Sopenharmony_ci tmp = 0xCAFEDEAD; 2678c2ecf20Sopenharmony_ci adev->wb.wb[index] = cpu_to_le32(tmp); 2688c2ecf20Sopenharmony_ci memset(&ib, 0, sizeof(ib)); 2698c2ecf20Sopenharmony_ci r = amdgpu_ib_get(adev, NULL, 256, 2708c2ecf20Sopenharmony_ci AMDGPU_IB_POOL_DIRECT, &ib); 2718c2ecf20Sopenharmony_ci if (r) 2728c2ecf20Sopenharmony_ci goto err0; 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 2758c2ecf20Sopenharmony_ci ib.ptr[1] = lower_32_bits(gpu_addr); 2768c2ecf20Sopenharmony_ci ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 2778c2ecf20Sopenharmony_ci ib.ptr[3] = 0xDEADBEEF; 2788c2ecf20Sopenharmony_ci ib.length_dw = 4; 2798c2ecf20Sopenharmony_ci r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 2808c2ecf20Sopenharmony_ci if (r) 2818c2ecf20Sopenharmony_ci goto err1; 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci r = dma_fence_wait_timeout(f, false, timeout); 2848c2ecf20Sopenharmony_ci if (r == 0) { 2858c2ecf20Sopenharmony_ci r = -ETIMEDOUT; 2868c2ecf20Sopenharmony_ci goto err1; 2878c2ecf20Sopenharmony_ci } else if (r < 0) { 2888c2ecf20Sopenharmony_ci goto err1; 2898c2ecf20Sopenharmony_ci } 2908c2ecf20Sopenharmony_ci tmp = le32_to_cpu(adev->wb.wb[index]); 2918c2ecf20Sopenharmony_ci if (tmp == 0xDEADBEEF) 2928c2ecf20Sopenharmony_ci r = 0; 2938c2ecf20Sopenharmony_ci else 2948c2ecf20Sopenharmony_ci r = -EINVAL; 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_cierr1: 2978c2ecf20Sopenharmony_ci amdgpu_ib_free(adev, &ib, NULL); 2988c2ecf20Sopenharmony_ci dma_fence_put(f); 2998c2ecf20Sopenharmony_cierr0: 3008c2ecf20Sopenharmony_ci amdgpu_device_wb_free(adev, index); 3018c2ecf20Sopenharmony_ci return r; 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci/** 3058c2ecf20Sopenharmony_ci * cik_dma_vm_copy_pte - update PTEs by copying them from the GART 3068c2ecf20Sopenharmony_ci * 3078c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3088c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3098c2ecf20Sopenharmony_ci * @src: src addr to copy from 3108c2ecf20Sopenharmony_ci * @count: number of page entries to update 3118c2ecf20Sopenharmony_ci * 3128c2ecf20Sopenharmony_ci * Update PTEs by copying them from the GART using DMA (SI). 3138c2ecf20Sopenharmony_ci */ 3148c2ecf20Sopenharmony_cistatic void si_dma_vm_copy_pte(struct amdgpu_ib *ib, 3158c2ecf20Sopenharmony_ci uint64_t pe, uint64_t src, 3168c2ecf20Sopenharmony_ci unsigned count) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci unsigned bytes = count * 8; 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 3218c2ecf20Sopenharmony_ci 1, 0, 0, bytes); 3228c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(pe); 3238c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(src); 3248c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 3258c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci/** 3298c2ecf20Sopenharmony_ci * si_dma_vm_write_pte - update PTEs by writing them manually 3308c2ecf20Sopenharmony_ci * 3318c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3328c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3338c2ecf20Sopenharmony_ci * @value: dst addr to write into pe 3348c2ecf20Sopenharmony_ci * @count: number of page entries to update 3358c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes 3368c2ecf20Sopenharmony_ci * 3378c2ecf20Sopenharmony_ci * Update PTEs by writing them manually using DMA (SI). 3388c2ecf20Sopenharmony_ci */ 3398c2ecf20Sopenharmony_cistatic void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 3408c2ecf20Sopenharmony_ci uint64_t value, unsigned count, 3418c2ecf20Sopenharmony_ci uint32_t incr) 3428c2ecf20Sopenharmony_ci{ 3438c2ecf20Sopenharmony_ci unsigned ndw = count * 2; 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); 3468c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(pe); 3478c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe); 3488c2ecf20Sopenharmony_ci for (; ndw > 0; ndw -= 2) { 3498c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(value); 3508c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 3518c2ecf20Sopenharmony_ci value += incr; 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci} 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci/** 3568c2ecf20Sopenharmony_ci * si_dma_vm_set_pte_pde - update the page tables using sDMA 3578c2ecf20Sopenharmony_ci * 3588c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3598c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3608c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe 3618c2ecf20Sopenharmony_ci * @count: number of page entries to update 3628c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes 3638c2ecf20Sopenharmony_ci * @flags: access flags 3648c2ecf20Sopenharmony_ci * 3658c2ecf20Sopenharmony_ci * Update the page tables using sDMA (CIK). 3668c2ecf20Sopenharmony_ci */ 3678c2ecf20Sopenharmony_cistatic void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, 3688c2ecf20Sopenharmony_ci uint64_t pe, 3698c2ecf20Sopenharmony_ci uint64_t addr, unsigned count, 3708c2ecf20Sopenharmony_ci uint32_t incr, uint64_t flags) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci uint64_t value; 3738c2ecf20Sopenharmony_ci unsigned ndw; 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci while (count) { 3768c2ecf20Sopenharmony_ci ndw = count * 2; 3778c2ecf20Sopenharmony_ci if (ndw > 0xFFFFE) 3788c2ecf20Sopenharmony_ci ndw = 0xFFFFE; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci if (flags & AMDGPU_PTE_VALID) 3818c2ecf20Sopenharmony_ci value = addr; 3828c2ecf20Sopenharmony_ci else 3838c2ecf20Sopenharmony_ci value = 0; 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci /* for physically contiguous pages (vram) */ 3868c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 3878c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = pe; /* dst addr */ 3888c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 3898c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 3908c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(flags); 3918c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = value; /* value */ 3928c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 3938c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = incr; /* increment size */ 3948c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = 0; 3958c2ecf20Sopenharmony_ci pe += ndw * 4; 3968c2ecf20Sopenharmony_ci addr += (ndw / 2) * incr; 3978c2ecf20Sopenharmony_ci count -= ndw / 2; 3988c2ecf20Sopenharmony_ci } 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci/** 4028c2ecf20Sopenharmony_ci * si_dma_pad_ib - pad the IB to the required number of dw 4038c2ecf20Sopenharmony_ci * 4048c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with padding 4058c2ecf20Sopenharmony_ci * 4068c2ecf20Sopenharmony_ci */ 4078c2ecf20Sopenharmony_cistatic void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 4088c2ecf20Sopenharmony_ci{ 4098c2ecf20Sopenharmony_ci while (ib->length_dw & 0x7) 4108c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci/** 4148c2ecf20Sopenharmony_ci * cik_sdma_ring_emit_pipeline_sync - sync the pipeline 4158c2ecf20Sopenharmony_ci * 4168c2ecf20Sopenharmony_ci * @ring: amdgpu_ring pointer 4178c2ecf20Sopenharmony_ci * 4188c2ecf20Sopenharmony_ci * Make sure all previous operations are completed (CIK). 4198c2ecf20Sopenharmony_ci */ 4208c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4218c2ecf20Sopenharmony_ci{ 4228c2ecf20Sopenharmony_ci uint32_t seq = ring->fence_drv.sync_seq; 4238c2ecf20Sopenharmony_ci uint64_t addr = ring->fence_drv.gpu_addr; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci /* wait for idle */ 4268c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) | 4278c2ecf20Sopenharmony_ci (1 << 27)); /* Poll memory */ 4288c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, lower_32_bits(addr)); 4298c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr)); /* retry, addr_hi */ 4308c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, 0xffffffff); /* mask */ 4318c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, seq); /* value */ 4328c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (3 << 28) | 0x20); /* func(equal) | poll interval */ 4338c2ecf20Sopenharmony_ci} 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci/** 4368c2ecf20Sopenharmony_ci * si_dma_ring_emit_vm_flush - cik vm flush using sDMA 4378c2ecf20Sopenharmony_ci * 4388c2ecf20Sopenharmony_ci * @ring: amdgpu_ring pointer 4398c2ecf20Sopenharmony_ci * @vm: amdgpu_vm pointer 4408c2ecf20Sopenharmony_ci * 4418c2ecf20Sopenharmony_ci * Update the page table base and flush the VM TLB 4428c2ecf20Sopenharmony_ci * using sDMA (VI). 4438c2ecf20Sopenharmony_ci */ 4448c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 4458c2ecf20Sopenharmony_ci unsigned vmid, uint64_t pd_addr) 4468c2ecf20Sopenharmony_ci{ 4478c2ecf20Sopenharmony_ci amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci /* wait for invalidate to complete */ 4508c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 4518c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); 4528c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, 0xff << 16); /* retry */ 4538c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, 1 << vmid); /* mask */ 4548c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, 0); /* value */ 4558c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_cistatic void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, 4598c2ecf20Sopenharmony_ci uint32_t reg, uint32_t val) 4608c2ecf20Sopenharmony_ci{ 4618c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 4628c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, (0xf << 16) | reg); 4638c2ecf20Sopenharmony_ci amdgpu_ring_write(ring, val); 4648c2ecf20Sopenharmony_ci} 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_cistatic int si_dma_early_init(void *handle) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci adev->sdma.num_instances = 2; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci si_dma_set_ring_funcs(adev); 4738c2ecf20Sopenharmony_ci si_dma_set_buffer_funcs(adev); 4748c2ecf20Sopenharmony_ci si_dma_set_vm_pte_funcs(adev); 4758c2ecf20Sopenharmony_ci si_dma_set_irq_funcs(adev); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci return 0; 4788c2ecf20Sopenharmony_ci} 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_cistatic int si_dma_sw_init(void *handle) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci struct amdgpu_ring *ring; 4838c2ecf20Sopenharmony_ci int r, i; 4848c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci /* DMA0 trap event */ 4878c2ecf20Sopenharmony_ci r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, 4888c2ecf20Sopenharmony_ci &adev->sdma.trap_irq); 4898c2ecf20Sopenharmony_ci if (r) 4908c2ecf20Sopenharmony_ci return r; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci /* DMA1 trap event */ 4938c2ecf20Sopenharmony_ci r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, 4948c2ecf20Sopenharmony_ci &adev->sdma.trap_irq); 4958c2ecf20Sopenharmony_ci if (r) 4968c2ecf20Sopenharmony_ci return r; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 4998c2ecf20Sopenharmony_ci ring = &adev->sdma.instance[i].ring; 5008c2ecf20Sopenharmony_ci ring->ring_obj = NULL; 5018c2ecf20Sopenharmony_ci ring->use_doorbell = false; 5028c2ecf20Sopenharmony_ci sprintf(ring->name, "sdma%d", i); 5038c2ecf20Sopenharmony_ci r = amdgpu_ring_init(adev, ring, 1024, 5048c2ecf20Sopenharmony_ci &adev->sdma.trap_irq, 5058c2ecf20Sopenharmony_ci (i == 0) ? 5068c2ecf20Sopenharmony_ci AMDGPU_SDMA_IRQ_INSTANCE0 : 5078c2ecf20Sopenharmony_ci AMDGPU_SDMA_IRQ_INSTANCE1, 5088c2ecf20Sopenharmony_ci AMDGPU_RING_PRIO_DEFAULT); 5098c2ecf20Sopenharmony_ci if (r) 5108c2ecf20Sopenharmony_ci return r; 5118c2ecf20Sopenharmony_ci } 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci return r; 5148c2ecf20Sopenharmony_ci} 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_cistatic int si_dma_sw_fini(void *handle) 5178c2ecf20Sopenharmony_ci{ 5188c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5198c2ecf20Sopenharmony_ci int i; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) 5228c2ecf20Sopenharmony_ci amdgpu_ring_fini(&adev->sdma.instance[i].ring); 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci return 0; 5258c2ecf20Sopenharmony_ci} 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_cistatic int si_dma_hw_init(void *handle) 5288c2ecf20Sopenharmony_ci{ 5298c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci return si_dma_start(adev); 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_cistatic int si_dma_hw_fini(void *handle) 5358c2ecf20Sopenharmony_ci{ 5368c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci si_dma_stop(adev); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci return 0; 5418c2ecf20Sopenharmony_ci} 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_cistatic int si_dma_suspend(void *handle) 5448c2ecf20Sopenharmony_ci{ 5458c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci return si_dma_hw_fini(adev); 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_cistatic int si_dma_resume(void *handle) 5518c2ecf20Sopenharmony_ci{ 5528c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci return si_dma_hw_init(adev); 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_cistatic bool si_dma_is_idle(void *handle) 5588c2ecf20Sopenharmony_ci{ 5598c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5608c2ecf20Sopenharmony_ci u32 tmp = RREG32(SRBM_STATUS2); 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) 5638c2ecf20Sopenharmony_ci return false; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci return true; 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_cistatic int si_dma_wait_for_idle(void *handle) 5698c2ecf20Sopenharmony_ci{ 5708c2ecf20Sopenharmony_ci unsigned i; 5718c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci for (i = 0; i < adev->usec_timeout; i++) { 5748c2ecf20Sopenharmony_ci if (si_dma_is_idle(handle)) 5758c2ecf20Sopenharmony_ci return 0; 5768c2ecf20Sopenharmony_ci udelay(1); 5778c2ecf20Sopenharmony_ci } 5788c2ecf20Sopenharmony_ci return -ETIMEDOUT; 5798c2ecf20Sopenharmony_ci} 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_cistatic int si_dma_soft_reset(void *handle) 5828c2ecf20Sopenharmony_ci{ 5838c2ecf20Sopenharmony_ci DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); 5848c2ecf20Sopenharmony_ci return 0; 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic int si_dma_set_trap_irq_state(struct amdgpu_device *adev, 5888c2ecf20Sopenharmony_ci struct amdgpu_irq_src *src, 5898c2ecf20Sopenharmony_ci unsigned type, 5908c2ecf20Sopenharmony_ci enum amdgpu_interrupt_state state) 5918c2ecf20Sopenharmony_ci{ 5928c2ecf20Sopenharmony_ci u32 sdma_cntl; 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci switch (type) { 5958c2ecf20Sopenharmony_ci case AMDGPU_SDMA_IRQ_INSTANCE0: 5968c2ecf20Sopenharmony_ci switch (state) { 5978c2ecf20Sopenharmony_ci case AMDGPU_IRQ_STATE_DISABLE: 5988c2ecf20Sopenharmony_ci sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 5998c2ecf20Sopenharmony_ci sdma_cntl &= ~TRAP_ENABLE; 6008c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 6018c2ecf20Sopenharmony_ci break; 6028c2ecf20Sopenharmony_ci case AMDGPU_IRQ_STATE_ENABLE: 6038c2ecf20Sopenharmony_ci sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); 6048c2ecf20Sopenharmony_ci sdma_cntl |= TRAP_ENABLE; 6058c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); 6068c2ecf20Sopenharmony_ci break; 6078c2ecf20Sopenharmony_ci default: 6088c2ecf20Sopenharmony_ci break; 6098c2ecf20Sopenharmony_ci } 6108c2ecf20Sopenharmony_ci break; 6118c2ecf20Sopenharmony_ci case AMDGPU_SDMA_IRQ_INSTANCE1: 6128c2ecf20Sopenharmony_ci switch (state) { 6138c2ecf20Sopenharmony_ci case AMDGPU_IRQ_STATE_DISABLE: 6148c2ecf20Sopenharmony_ci sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 6158c2ecf20Sopenharmony_ci sdma_cntl &= ~TRAP_ENABLE; 6168c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 6178c2ecf20Sopenharmony_ci break; 6188c2ecf20Sopenharmony_ci case AMDGPU_IRQ_STATE_ENABLE: 6198c2ecf20Sopenharmony_ci sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); 6208c2ecf20Sopenharmony_ci sdma_cntl |= TRAP_ENABLE; 6218c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); 6228c2ecf20Sopenharmony_ci break; 6238c2ecf20Sopenharmony_ci default: 6248c2ecf20Sopenharmony_ci break; 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci break; 6278c2ecf20Sopenharmony_ci default: 6288c2ecf20Sopenharmony_ci break; 6298c2ecf20Sopenharmony_ci } 6308c2ecf20Sopenharmony_ci return 0; 6318c2ecf20Sopenharmony_ci} 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_cistatic int si_dma_process_trap_irq(struct amdgpu_device *adev, 6348c2ecf20Sopenharmony_ci struct amdgpu_irq_src *source, 6358c2ecf20Sopenharmony_ci struct amdgpu_iv_entry *entry) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci if (entry->src_id == 224) 6388c2ecf20Sopenharmony_ci amdgpu_fence_process(&adev->sdma.instance[0].ring); 6398c2ecf20Sopenharmony_ci else 6408c2ecf20Sopenharmony_ci amdgpu_fence_process(&adev->sdma.instance[1].ring); 6418c2ecf20Sopenharmony_ci return 0; 6428c2ecf20Sopenharmony_ci} 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_cistatic int si_dma_set_clockgating_state(void *handle, 6458c2ecf20Sopenharmony_ci enum amd_clockgating_state state) 6468c2ecf20Sopenharmony_ci{ 6478c2ecf20Sopenharmony_ci u32 orig, data, offset; 6488c2ecf20Sopenharmony_ci int i; 6498c2ecf20Sopenharmony_ci bool enable; 6508c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci enable = (state == AMD_CG_STATE_GATE); 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { 6558c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 6568c2ecf20Sopenharmony_ci if (i == 0) 6578c2ecf20Sopenharmony_ci offset = DMA0_REGISTER_OFFSET; 6588c2ecf20Sopenharmony_ci else 6598c2ecf20Sopenharmony_ci offset = DMA1_REGISTER_OFFSET; 6608c2ecf20Sopenharmony_ci orig = data = RREG32(DMA_POWER_CNTL + offset); 6618c2ecf20Sopenharmony_ci data &= ~MEM_POWER_OVERRIDE; 6628c2ecf20Sopenharmony_ci if (data != orig) 6638c2ecf20Sopenharmony_ci WREG32(DMA_POWER_CNTL + offset, data); 6648c2ecf20Sopenharmony_ci WREG32(DMA_CLK_CTRL + offset, 0x00000100); 6658c2ecf20Sopenharmony_ci } 6668c2ecf20Sopenharmony_ci } else { 6678c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 6688c2ecf20Sopenharmony_ci if (i == 0) 6698c2ecf20Sopenharmony_ci offset = DMA0_REGISTER_OFFSET; 6708c2ecf20Sopenharmony_ci else 6718c2ecf20Sopenharmony_ci offset = DMA1_REGISTER_OFFSET; 6728c2ecf20Sopenharmony_ci orig = data = RREG32(DMA_POWER_CNTL + offset); 6738c2ecf20Sopenharmony_ci data |= MEM_POWER_OVERRIDE; 6748c2ecf20Sopenharmony_ci if (data != orig) 6758c2ecf20Sopenharmony_ci WREG32(DMA_POWER_CNTL + offset, data); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci orig = data = RREG32(DMA_CLK_CTRL + offset); 6788c2ecf20Sopenharmony_ci data = 0xff000000; 6798c2ecf20Sopenharmony_ci if (data != orig) 6808c2ecf20Sopenharmony_ci WREG32(DMA_CLK_CTRL + offset, data); 6818c2ecf20Sopenharmony_ci } 6828c2ecf20Sopenharmony_ci } 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci return 0; 6858c2ecf20Sopenharmony_ci} 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_cistatic int si_dma_set_powergating_state(void *handle, 6888c2ecf20Sopenharmony_ci enum amd_powergating_state state) 6898c2ecf20Sopenharmony_ci{ 6908c2ecf20Sopenharmony_ci u32 tmp; 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_ci WREG32(DMA_PGFSM_WRITE, 0x00002000); 6958c2ecf20Sopenharmony_ci WREG32(DMA_PGFSM_CONFIG, 0x100010ff); 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci for (tmp = 0; tmp < 5; tmp++) 6988c2ecf20Sopenharmony_ci WREG32(DMA_PGFSM_WRITE, 0); 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci return 0; 7018c2ecf20Sopenharmony_ci} 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_cistatic const struct amd_ip_funcs si_dma_ip_funcs = { 7048c2ecf20Sopenharmony_ci .name = "si_dma", 7058c2ecf20Sopenharmony_ci .early_init = si_dma_early_init, 7068c2ecf20Sopenharmony_ci .late_init = NULL, 7078c2ecf20Sopenharmony_ci .sw_init = si_dma_sw_init, 7088c2ecf20Sopenharmony_ci .sw_fini = si_dma_sw_fini, 7098c2ecf20Sopenharmony_ci .hw_init = si_dma_hw_init, 7108c2ecf20Sopenharmony_ci .hw_fini = si_dma_hw_fini, 7118c2ecf20Sopenharmony_ci .suspend = si_dma_suspend, 7128c2ecf20Sopenharmony_ci .resume = si_dma_resume, 7138c2ecf20Sopenharmony_ci .is_idle = si_dma_is_idle, 7148c2ecf20Sopenharmony_ci .wait_for_idle = si_dma_wait_for_idle, 7158c2ecf20Sopenharmony_ci .soft_reset = si_dma_soft_reset, 7168c2ecf20Sopenharmony_ci .set_clockgating_state = si_dma_set_clockgating_state, 7178c2ecf20Sopenharmony_ci .set_powergating_state = si_dma_set_powergating_state, 7188c2ecf20Sopenharmony_ci}; 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_cistatic const struct amdgpu_ring_funcs si_dma_ring_funcs = { 7218c2ecf20Sopenharmony_ci .type = AMDGPU_RING_TYPE_SDMA, 7228c2ecf20Sopenharmony_ci .align_mask = 0xf, 7238c2ecf20Sopenharmony_ci .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), 7248c2ecf20Sopenharmony_ci .support_64bit_ptrs = false, 7258c2ecf20Sopenharmony_ci .get_rptr = si_dma_ring_get_rptr, 7268c2ecf20Sopenharmony_ci .get_wptr = si_dma_ring_get_wptr, 7278c2ecf20Sopenharmony_ci .set_wptr = si_dma_ring_set_wptr, 7288c2ecf20Sopenharmony_ci .emit_frame_size = 7298c2ecf20Sopenharmony_ci 3 + 3 + /* hdp flush / invalidate */ 7308c2ecf20Sopenharmony_ci 6 + /* si_dma_ring_emit_pipeline_sync */ 7318c2ecf20Sopenharmony_ci SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */ 7328c2ecf20Sopenharmony_ci 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 7338c2ecf20Sopenharmony_ci .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 7348c2ecf20Sopenharmony_ci .emit_ib = si_dma_ring_emit_ib, 7358c2ecf20Sopenharmony_ci .emit_fence = si_dma_ring_emit_fence, 7368c2ecf20Sopenharmony_ci .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 7378c2ecf20Sopenharmony_ci .emit_vm_flush = si_dma_ring_emit_vm_flush, 7388c2ecf20Sopenharmony_ci .test_ring = si_dma_ring_test_ring, 7398c2ecf20Sopenharmony_ci .test_ib = si_dma_ring_test_ib, 7408c2ecf20Sopenharmony_ci .insert_nop = amdgpu_ring_insert_nop, 7418c2ecf20Sopenharmony_ci .pad_ib = si_dma_ring_pad_ib, 7428c2ecf20Sopenharmony_ci .emit_wreg = si_dma_ring_emit_wreg, 7438c2ecf20Sopenharmony_ci}; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_cistatic void si_dma_set_ring_funcs(struct amdgpu_device *adev) 7468c2ecf20Sopenharmony_ci{ 7478c2ecf20Sopenharmony_ci int i; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) 7508c2ecf20Sopenharmony_ci adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs; 7518c2ecf20Sopenharmony_ci} 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_cistatic const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { 7548c2ecf20Sopenharmony_ci .set = si_dma_set_trap_irq_state, 7558c2ecf20Sopenharmony_ci .process = si_dma_process_trap_irq, 7568c2ecf20Sopenharmony_ci}; 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_cistatic void si_dma_set_irq_funcs(struct amdgpu_device *adev) 7598c2ecf20Sopenharmony_ci{ 7608c2ecf20Sopenharmony_ci adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 7618c2ecf20Sopenharmony_ci adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 7628c2ecf20Sopenharmony_ci} 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci/** 7658c2ecf20Sopenharmony_ci * si_dma_emit_copy_buffer - copy buffer using the sDMA engine 7668c2ecf20Sopenharmony_ci * 7678c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information 7688c2ecf20Sopenharmony_ci * @src_offset: src GPU address 7698c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address 7708c2ecf20Sopenharmony_ci * @byte_count: number of bytes to xfer 7718c2ecf20Sopenharmony_ci * 7728c2ecf20Sopenharmony_ci * Copy GPU buffers using the DMA engine (VI). 7738c2ecf20Sopenharmony_ci * Used by the amdgpu ttm implementation to move pages if 7748c2ecf20Sopenharmony_ci * registered as the asic copy callback. 7758c2ecf20Sopenharmony_ci */ 7768c2ecf20Sopenharmony_cistatic void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, 7778c2ecf20Sopenharmony_ci uint64_t src_offset, 7788c2ecf20Sopenharmony_ci uint64_t dst_offset, 7798c2ecf20Sopenharmony_ci uint32_t byte_count, 7808c2ecf20Sopenharmony_ci bool tmz) 7818c2ecf20Sopenharmony_ci{ 7828c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 7838c2ecf20Sopenharmony_ci 1, 0, 0, byte_count); 7848c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 7858c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 7868c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff; 7878c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff; 7888c2ecf20Sopenharmony_ci} 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci/** 7918c2ecf20Sopenharmony_ci * si_dma_emit_fill_buffer - fill buffer using the sDMA engine 7928c2ecf20Sopenharmony_ci * 7938c2ecf20Sopenharmony_ci * @ring: amdgpu_ring structure holding ring information 7948c2ecf20Sopenharmony_ci * @src_data: value to write to buffer 7958c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address 7968c2ecf20Sopenharmony_ci * @byte_count: number of bytes to xfer 7978c2ecf20Sopenharmony_ci * 7988c2ecf20Sopenharmony_ci * Fill GPU buffers using the DMA engine (VI). 7998c2ecf20Sopenharmony_ci */ 8008c2ecf20Sopenharmony_cistatic void si_dma_emit_fill_buffer(struct amdgpu_ib *ib, 8018c2ecf20Sopenharmony_ci uint32_t src_data, 8028c2ecf20Sopenharmony_ci uint64_t dst_offset, 8038c2ecf20Sopenharmony_ci uint32_t byte_count) 8048c2ecf20Sopenharmony_ci{ 8058c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL, 8068c2ecf20Sopenharmony_ci 0, 0, 0, byte_count / 4); 8078c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 8088c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = src_data; 8098c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16; 8108c2ecf20Sopenharmony_ci} 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_cistatic const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { 8148c2ecf20Sopenharmony_ci .copy_max_bytes = 0xffff8, 8158c2ecf20Sopenharmony_ci .copy_num_dw = 5, 8168c2ecf20Sopenharmony_ci .emit_copy_buffer = si_dma_emit_copy_buffer, 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci .fill_max_bytes = 0xffff8, 8198c2ecf20Sopenharmony_ci .fill_num_dw = 4, 8208c2ecf20Sopenharmony_ci .emit_fill_buffer = si_dma_emit_fill_buffer, 8218c2ecf20Sopenharmony_ci}; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_cistatic void si_dma_set_buffer_funcs(struct amdgpu_device *adev) 8248c2ecf20Sopenharmony_ci{ 8258c2ecf20Sopenharmony_ci adev->mman.buffer_funcs = &si_dma_buffer_funcs; 8268c2ecf20Sopenharmony_ci adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 8278c2ecf20Sopenharmony_ci} 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_cistatic const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 8308c2ecf20Sopenharmony_ci .copy_pte_num_dw = 5, 8318c2ecf20Sopenharmony_ci .copy_pte = si_dma_vm_copy_pte, 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci .write_pte = si_dma_vm_write_pte, 8348c2ecf20Sopenharmony_ci .set_pte_pde = si_dma_vm_set_pte_pde, 8358c2ecf20Sopenharmony_ci}; 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_cistatic void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci unsigned i; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; 8428c2ecf20Sopenharmony_ci for (i = 0; i < adev->sdma.num_instances; i++) { 8438c2ecf20Sopenharmony_ci adev->vm_manager.vm_pte_scheds[i] = 8448c2ecf20Sopenharmony_ci &adev->sdma.instance[i].ring.sched; 8458c2ecf20Sopenharmony_ci } 8468c2ecf20Sopenharmony_ci adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 8478c2ecf20Sopenharmony_ci} 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ciconst struct amdgpu_ip_block_version si_dma_ip_block = 8508c2ecf20Sopenharmony_ci{ 8518c2ecf20Sopenharmony_ci .type = AMD_IP_BLOCK_TYPE_SDMA, 8528c2ecf20Sopenharmony_ci .major = 1, 8538c2ecf20Sopenharmony_ci .minor = 0, 8548c2ecf20Sopenharmony_ci .rev = 0, 8558c2ecf20Sopenharmony_ci .funcs = &si_dma_ip_funcs, 8568c2ecf20Sopenharmony_ci}; 857