18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright 2010 Advanced Micro Devices, Inc. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Authors: Alex Deucher 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "radeon.h" 268c2ecf20Sopenharmony_ci#include "radeon_asic.h" 278c2ecf20Sopenharmony_ci#include "radeon_trace.h" 288c2ecf20Sopenharmony_ci#include "nid.h" 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ciu32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * DMA 348c2ecf20Sopenharmony_ci * Starting with R600, the GPU has an asynchronous 358c2ecf20Sopenharmony_ci * DMA engine. The programming model is very similar 368c2ecf20Sopenharmony_ci * to the 3D engine (ring buffer, IBs, etc.), but the 378c2ecf20Sopenharmony_ci * DMA controller has it's own packet format that is 388c2ecf20Sopenharmony_ci * different form the PM4 format used by the 3D engine. 398c2ecf20Sopenharmony_ci * It supports copying data, writing embedded data, 408c2ecf20Sopenharmony_ci * solid fills, and a number of other things. It also 418c2ecf20Sopenharmony_ci * has support for tiling/detiling of buffers. 428c2ecf20Sopenharmony_ci * Cayman and newer support two asynchronous DMA engines. 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/** 468c2ecf20Sopenharmony_ci * cayman_dma_get_rptr - get the current read pointer 478c2ecf20Sopenharmony_ci * 488c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 498c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * Get the current rptr from the hardware (cayman+). 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ciuint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 548c2ecf20Sopenharmony_ci struct radeon_ring *ring) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci u32 rptr, reg; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci if (rdev->wb.enabled) { 598c2ecf20Sopenharmony_ci rptr = rdev->wb.wb[ring->rptr_offs/4]; 608c2ecf20Sopenharmony_ci } else { 618c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 628c2ecf20Sopenharmony_ci reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 638c2ecf20Sopenharmony_ci else 648c2ecf20Sopenharmony_ci reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci rptr = RREG32(reg); 678c2ecf20Sopenharmony_ci } 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci return (rptr & 0x3fffc) >> 2; 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/** 738c2ecf20Sopenharmony_ci * cayman_dma_get_wptr - get the current write pointer 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 768c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 778c2ecf20Sopenharmony_ci * 788c2ecf20Sopenharmony_ci * Get the current wptr from the hardware (cayman+). 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ciuint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 818c2ecf20Sopenharmony_ci struct radeon_ring *ring) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci u32 reg; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 868c2ecf20Sopenharmony_ci reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 878c2ecf20Sopenharmony_ci else 888c2ecf20Sopenharmony_ci reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci return (RREG32(reg) & 0x3fffc) >> 2; 918c2ecf20Sopenharmony_ci} 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci/** 948c2ecf20Sopenharmony_ci * cayman_dma_set_wptr - commit the write pointer 958c2ecf20Sopenharmony_ci * 968c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 978c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 988c2ecf20Sopenharmony_ci * 998c2ecf20Sopenharmony_ci * Write the wptr back to the hardware (cayman+). 1008c2ecf20Sopenharmony_ci */ 1018c2ecf20Sopenharmony_civoid cayman_dma_set_wptr(struct radeon_device *rdev, 1028c2ecf20Sopenharmony_ci struct radeon_ring *ring) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci u32 reg; 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 1078c2ecf20Sopenharmony_ci reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 1088c2ecf20Sopenharmony_ci else 1098c2ecf20Sopenharmony_ci reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci WREG32(reg, (ring->wptr << 2) & 0x3fffc); 1128c2ecf20Sopenharmony_ci} 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci/** 1158c2ecf20Sopenharmony_ci * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 1168c2ecf20Sopenharmony_ci * 1178c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 1188c2ecf20Sopenharmony_ci * @ib: IB object to schedule 1198c2ecf20Sopenharmony_ci * 1208c2ecf20Sopenharmony_ci * Schedule an IB in the DMA ring (cayman-SI). 1218c2ecf20Sopenharmony_ci */ 1228c2ecf20Sopenharmony_civoid cayman_dma_ring_ib_execute(struct radeon_device *rdev, 1238c2ecf20Sopenharmony_ci struct radeon_ib *ib) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[ib->ring]; 1268c2ecf20Sopenharmony_ci unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci if (rdev->wb.enabled) { 1298c2ecf20Sopenharmony_ci u32 next_rptr = ring->wptr + 4; 1308c2ecf20Sopenharmony_ci while ((next_rptr & 7) != 5) 1318c2ecf20Sopenharmony_ci next_rptr++; 1328c2ecf20Sopenharmony_ci next_rptr += 3; 1338c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 1348c2ecf20Sopenharmony_ci radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 1358c2ecf20Sopenharmony_ci radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 1368c2ecf20Sopenharmony_ci radeon_ring_write(ring, next_rptr); 1378c2ecf20Sopenharmony_ci } 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 1408c2ecf20Sopenharmony_ci * Pad as necessary with NOPs. 1418c2ecf20Sopenharmony_ci */ 1428c2ecf20Sopenharmony_ci while ((ring->wptr & 7) != 5) 1438c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 1448c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 1458c2ecf20Sopenharmony_ci radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 1468c2ecf20Sopenharmony_ci radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci} 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci/** 1518c2ecf20Sopenharmony_ci * cayman_dma_stop - stop the async dma engines 1528c2ecf20Sopenharmony_ci * 1538c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 1548c2ecf20Sopenharmony_ci * 1558c2ecf20Sopenharmony_ci * Stop the async dma engines (cayman-SI). 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_civoid cayman_dma_stop(struct radeon_device *rdev) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci u32 rb_cntl; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 1628c2ecf20Sopenharmony_ci (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 1638c2ecf20Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci /* dma0 */ 1668c2ecf20Sopenharmony_ci rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 1678c2ecf20Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 1688c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* dma1 */ 1718c2ecf20Sopenharmony_ci rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 1728c2ecf20Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 1738c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 1768c2ecf20Sopenharmony_ci rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/** 1808c2ecf20Sopenharmony_ci * cayman_dma_resume - setup and start the async dma engines 1818c2ecf20Sopenharmony_ci * 1828c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 1838c2ecf20Sopenharmony_ci * 1848c2ecf20Sopenharmony_ci * Set up the DMA ring buffers and enable them. (cayman-SI). 1858c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure. 1868c2ecf20Sopenharmony_ci */ 1878c2ecf20Sopenharmony_ciint cayman_dma_resume(struct radeon_device *rdev) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci struct radeon_ring *ring; 1908c2ecf20Sopenharmony_ci u32 rb_cntl, dma_cntl, ib_cntl; 1918c2ecf20Sopenharmony_ci u32 rb_bufsz; 1928c2ecf20Sopenharmony_ci u32 reg_offset, wb_offset; 1938c2ecf20Sopenharmony_ci int i, r; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci for (i = 0; i < 2; i++) { 1968c2ecf20Sopenharmony_ci if (i == 0) { 1978c2ecf20Sopenharmony_ci ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 1988c2ecf20Sopenharmony_ci reg_offset = DMA0_REGISTER_OFFSET; 1998c2ecf20Sopenharmony_ci wb_offset = R600_WB_DMA_RPTR_OFFSET; 2008c2ecf20Sopenharmony_ci } else { 2018c2ecf20Sopenharmony_ci ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 2028c2ecf20Sopenharmony_ci reg_offset = DMA1_REGISTER_OFFSET; 2038c2ecf20Sopenharmony_ci wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 2078c2ecf20Sopenharmony_ci WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci /* Set ring buffer size in dwords */ 2108c2ecf20Sopenharmony_ci rb_bufsz = order_base_2(ring->ring_size / 4); 2118c2ecf20Sopenharmony_ci rb_cntl = rb_bufsz << 1; 2128c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 2138c2ecf20Sopenharmony_ci rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 2148c2ecf20Sopenharmony_ci#endif 2158c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci /* Initialize the ring buffer's read and write pointers */ 2188c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR + reg_offset, 0); 2198c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR + reg_offset, 0); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci /* set the wb address whether it's enabled or not */ 2228c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 2238c2ecf20Sopenharmony_ci upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 2248c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 2258c2ecf20Sopenharmony_ci ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci if (rdev->wb.enabled) 2288c2ecf20Sopenharmony_ci rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci /* enable DMA IBs */ 2338c2ecf20Sopenharmony_ci ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 2348c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 2358c2ecf20Sopenharmony_ci ib_cntl |= DMA_IB_SWAP_ENABLE; 2368c2ecf20Sopenharmony_ci#endif 2378c2ecf20Sopenharmony_ci WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci dma_cntl = RREG32(DMA_CNTL + reg_offset); 2408c2ecf20Sopenharmony_ci dma_cntl &= ~CTXEMPTY_INT_ENABLE; 2418c2ecf20Sopenharmony_ci WREG32(DMA_CNTL + reg_offset, dma_cntl); 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci ring->wptr = 0; 2448c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci ring->ready = true; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci r = radeon_ring_test(rdev, ring->idx, ring); 2518c2ecf20Sopenharmony_ci if (r) { 2528c2ecf20Sopenharmony_ci ring->ready = false; 2538c2ecf20Sopenharmony_ci return r; 2548c2ecf20Sopenharmony_ci } 2558c2ecf20Sopenharmony_ci } 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 2588c2ecf20Sopenharmony_ci (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 2598c2ecf20Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci return 0; 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci/** 2658c2ecf20Sopenharmony_ci * cayman_dma_fini - tear down the async dma engines 2668c2ecf20Sopenharmony_ci * 2678c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 2688c2ecf20Sopenharmony_ci * 2698c2ecf20Sopenharmony_ci * Stop the async dma engines and free the rings (cayman-SI). 2708c2ecf20Sopenharmony_ci */ 2718c2ecf20Sopenharmony_civoid cayman_dma_fini(struct radeon_device *rdev) 2728c2ecf20Sopenharmony_ci{ 2738c2ecf20Sopenharmony_ci cayman_dma_stop(rdev); 2748c2ecf20Sopenharmony_ci radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 2758c2ecf20Sopenharmony_ci radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci/** 2798c2ecf20Sopenharmony_ci * cayman_dma_is_lockup - Check if the DMA engine is locked up 2808c2ecf20Sopenharmony_ci * 2818c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 2828c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information 2838c2ecf20Sopenharmony_ci * 2848c2ecf20Sopenharmony_ci * Check if the async DMA engine is locked up. 2858c2ecf20Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not. 2868c2ecf20Sopenharmony_ci */ 2878c2ecf20Sopenharmony_cibool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2888c2ecf20Sopenharmony_ci{ 2898c2ecf20Sopenharmony_ci u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 2908c2ecf20Sopenharmony_ci u32 mask; 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 2938c2ecf20Sopenharmony_ci mask = RADEON_RESET_DMA; 2948c2ecf20Sopenharmony_ci else 2958c2ecf20Sopenharmony_ci mask = RADEON_RESET_DMA1; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci if (!(reset_mask & mask)) { 2988c2ecf20Sopenharmony_ci radeon_ring_lockup_update(rdev, ring); 2998c2ecf20Sopenharmony_ci return false; 3008c2ecf20Sopenharmony_ci } 3018c2ecf20Sopenharmony_ci return radeon_ring_test_lockup(rdev, ring); 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci/** 3058c2ecf20Sopenharmony_ci * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 3068c2ecf20Sopenharmony_ci * 3078c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 3088c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3098c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3108c2ecf20Sopenharmony_ci * @src: src addr where to copy from 3118c2ecf20Sopenharmony_ci * @count: number of page entries to update 3128c2ecf20Sopenharmony_ci * 3138c2ecf20Sopenharmony_ci * Update PTEs by copying them from the GART using the DMA (cayman/TN). 3148c2ecf20Sopenharmony_ci */ 3158c2ecf20Sopenharmony_civoid cayman_dma_vm_copy_pages(struct radeon_device *rdev, 3168c2ecf20Sopenharmony_ci struct radeon_ib *ib, 3178c2ecf20Sopenharmony_ci uint64_t pe, uint64_t src, 3188c2ecf20Sopenharmony_ci unsigned count) 3198c2ecf20Sopenharmony_ci{ 3208c2ecf20Sopenharmony_ci unsigned ndw; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci while (count) { 3238c2ecf20Sopenharmony_ci ndw = count * 2; 3248c2ecf20Sopenharmony_ci if (ndw > 0xFFFFE) 3258c2ecf20Sopenharmony_ci ndw = 0xFFFFE; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 3288c2ecf20Sopenharmony_ci 0, 0, ndw); 3298c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(pe); 3308c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(src); 3318c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 3328c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci pe += ndw * 4; 3358c2ecf20Sopenharmony_ci src += ndw * 4; 3368c2ecf20Sopenharmony_ci count -= ndw / 2; 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci} 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci/** 3418c2ecf20Sopenharmony_ci * cayman_dma_vm_write_pages - update PTEs by writing them manually 3428c2ecf20Sopenharmony_ci * 3438c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 3448c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3458c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3468c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe 3478c2ecf20Sopenharmony_ci * @count: number of page entries to update 3488c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes 3498c2ecf20Sopenharmony_ci * @flags: hw access flags 3508c2ecf20Sopenharmony_ci * 3518c2ecf20Sopenharmony_ci * Update PTEs by writing them manually using the DMA (cayman/TN). 3528c2ecf20Sopenharmony_ci */ 3538c2ecf20Sopenharmony_civoid cayman_dma_vm_write_pages(struct radeon_device *rdev, 3548c2ecf20Sopenharmony_ci struct radeon_ib *ib, 3558c2ecf20Sopenharmony_ci uint64_t pe, 3568c2ecf20Sopenharmony_ci uint64_t addr, unsigned count, 3578c2ecf20Sopenharmony_ci uint32_t incr, uint32_t flags) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci uint64_t value; 3608c2ecf20Sopenharmony_ci unsigned ndw; 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci while (count) { 3638c2ecf20Sopenharmony_ci ndw = count * 2; 3648c2ecf20Sopenharmony_ci if (ndw > 0xFFFFE) 3658c2ecf20Sopenharmony_ci ndw = 0xFFFFE; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci /* for non-physically contiguous pages (system) */ 3688c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 3698c2ecf20Sopenharmony_ci 0, 0, ndw); 3708c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = pe; 3718c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 3728c2ecf20Sopenharmony_ci for (; ndw > 0; ndw -= 2, --count, pe += 8) { 3738c2ecf20Sopenharmony_ci if (flags & R600_PTE_SYSTEM) { 3748c2ecf20Sopenharmony_ci value = radeon_vm_map_gart(rdev, addr); 3758c2ecf20Sopenharmony_ci } else if (flags & R600_PTE_VALID) { 3768c2ecf20Sopenharmony_ci value = addr; 3778c2ecf20Sopenharmony_ci } else { 3788c2ecf20Sopenharmony_ci value = 0; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci addr += incr; 3818c2ecf20Sopenharmony_ci value |= flags; 3828c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = value; 3838c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci } 3868c2ecf20Sopenharmony_ci} 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci/** 3898c2ecf20Sopenharmony_ci * cayman_dma_vm_set_pages - update the page tables using the DMA 3908c2ecf20Sopenharmony_ci * 3918c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 3928c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with commands 3938c2ecf20Sopenharmony_ci * @pe: addr of the page entry 3948c2ecf20Sopenharmony_ci * @addr: dst addr to write into pe 3958c2ecf20Sopenharmony_ci * @count: number of page entries to update 3968c2ecf20Sopenharmony_ci * @incr: increase next addr by incr bytes 3978c2ecf20Sopenharmony_ci * @flags: hw access flags 3988c2ecf20Sopenharmony_ci * 3998c2ecf20Sopenharmony_ci * Update the page tables using the DMA (cayman/TN). 4008c2ecf20Sopenharmony_ci */ 4018c2ecf20Sopenharmony_civoid cayman_dma_vm_set_pages(struct radeon_device *rdev, 4028c2ecf20Sopenharmony_ci struct radeon_ib *ib, 4038c2ecf20Sopenharmony_ci uint64_t pe, 4048c2ecf20Sopenharmony_ci uint64_t addr, unsigned count, 4058c2ecf20Sopenharmony_ci uint32_t incr, uint32_t flags) 4068c2ecf20Sopenharmony_ci{ 4078c2ecf20Sopenharmony_ci uint64_t value; 4088c2ecf20Sopenharmony_ci unsigned ndw; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci while (count) { 4118c2ecf20Sopenharmony_ci ndw = count * 2; 4128c2ecf20Sopenharmony_ci if (ndw > 0xFFFFE) 4138c2ecf20Sopenharmony_ci ndw = 0xFFFFE; 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci if (flags & R600_PTE_VALID) 4168c2ecf20Sopenharmony_ci value = addr; 4178c2ecf20Sopenharmony_ci else 4188c2ecf20Sopenharmony_ci value = 0; 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci /* for physically contiguous pages (vram) */ 4218c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 4228c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = pe; /* dst addr */ 4238c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 4248c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = flags; /* mask */ 4258c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = 0; 4268c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = value; /* value */ 4278c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 4288c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = incr; /* increment size */ 4298c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = 0; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci pe += ndw * 4; 4328c2ecf20Sopenharmony_ci addr += (ndw / 2) * incr; 4338c2ecf20Sopenharmony_ci count -= ndw / 2; 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci/** 4388c2ecf20Sopenharmony_ci * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 4398c2ecf20Sopenharmony_ci * 4408c2ecf20Sopenharmony_ci * @ib: indirect buffer to fill with padding 4418c2ecf20Sopenharmony_ci * 4428c2ecf20Sopenharmony_ci */ 4438c2ecf20Sopenharmony_civoid cayman_dma_vm_pad_ib(struct radeon_ib *ib) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci while (ib->length_dw & 0x7) 4468c2ecf20Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 4478c2ecf20Sopenharmony_ci} 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_civoid cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 4508c2ecf20Sopenharmony_ci unsigned vm_id, uint64_t pd_addr) 4518c2ecf20Sopenharmony_ci{ 4528c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 4538c2ecf20Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 4548c2ecf20Sopenharmony_ci radeon_ring_write(ring, pd_addr >> 12); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci /* flush hdp cache */ 4578c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 4588c2ecf20Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 4598c2ecf20Sopenharmony_ci radeon_ring_write(ring, 1); 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci /* bits 0-7 are the VM contexts0-7 */ 4628c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 4638c2ecf20Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 4648c2ecf20Sopenharmony_ci radeon_ring_write(ring, 1 << vm_id); 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci /* wait for invalidate to complete */ 4678c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 4688c2ecf20Sopenharmony_ci radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 4698c2ecf20Sopenharmony_ci radeon_ring_write(ring, 0); /* mask */ 4708c2ecf20Sopenharmony_ci radeon_ring_write(ring, 0); /* value */ 4718c2ecf20Sopenharmony_ci} 4728c2ecf20Sopenharmony_ci 473