18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright 2013 Advanced Micro Devices, Inc. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 128c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 158c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 168c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 178c2ecf20Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 188c2ecf20Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 198c2ecf20Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 208c2ecf20Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Authors: Alex Deucher 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include "radeon.h" 268c2ecf20Sopenharmony_ci#include "radeon_asic.h" 278c2ecf20Sopenharmony_ci#include "r600d.h" 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ciu32 r600_gpu_check_soft_reset(struct radeon_device *rdev); 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* 328c2ecf20Sopenharmony_ci * DMA 338c2ecf20Sopenharmony_ci * Starting with R600, the GPU has an asynchronous 348c2ecf20Sopenharmony_ci * DMA engine. The programming model is very similar 358c2ecf20Sopenharmony_ci * to the 3D engine (ring buffer, IBs, etc.), but the 368c2ecf20Sopenharmony_ci * DMA controller has it's own packet format that is 378c2ecf20Sopenharmony_ci * different form the PM4 format used by the 3D engine. 388c2ecf20Sopenharmony_ci * It supports copying data, writing embedded data, 398c2ecf20Sopenharmony_ci * solid fills, and a number of other things. It also 408c2ecf20Sopenharmony_ci * has support for tiling/detiling of buffers. 418c2ecf20Sopenharmony_ci */ 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci/** 448c2ecf20Sopenharmony_ci * r600_dma_get_rptr - get the current read pointer 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 478c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 488c2ecf20Sopenharmony_ci * 498c2ecf20Sopenharmony_ci * Get the current rptr from the hardware (r6xx+). 508c2ecf20Sopenharmony_ci */ 518c2ecf20Sopenharmony_ciuint32_t r600_dma_get_rptr(struct radeon_device *rdev, 528c2ecf20Sopenharmony_ci struct radeon_ring *ring) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci u32 rptr; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci if (rdev->wb.enabled) 578c2ecf20Sopenharmony_ci rptr = rdev->wb.wb[ring->rptr_offs/4]; 588c2ecf20Sopenharmony_ci else 598c2ecf20Sopenharmony_ci rptr = RREG32(DMA_RB_RPTR); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci return (rptr & 0x3fffc) >> 2; 628c2ecf20Sopenharmony_ci} 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/** 658c2ecf20Sopenharmony_ci * r600_dma_get_wptr - get the current write pointer 668c2ecf20Sopenharmony_ci * 678c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 688c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 698c2ecf20Sopenharmony_ci * 708c2ecf20Sopenharmony_ci * Get the current wptr from the hardware (r6xx+). 718c2ecf20Sopenharmony_ci */ 728c2ecf20Sopenharmony_ciuint32_t r600_dma_get_wptr(struct radeon_device *rdev, 738c2ecf20Sopenharmony_ci struct radeon_ring *ring) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; 768c2ecf20Sopenharmony_ci} 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci/** 798c2ecf20Sopenharmony_ci * r600_dma_set_wptr - commit the write pointer 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 828c2ecf20Sopenharmony_ci * @ring: radeon ring pointer 838c2ecf20Sopenharmony_ci * 848c2ecf20Sopenharmony_ci * Write the wptr back to the hardware (r6xx+). 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_civoid r600_dma_set_wptr(struct radeon_device *rdev, 878c2ecf20Sopenharmony_ci struct radeon_ring *ring) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); 908c2ecf20Sopenharmony_ci} 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci/** 938c2ecf20Sopenharmony_ci * r600_dma_stop - stop the async dma engine 948c2ecf20Sopenharmony_ci * 958c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 968c2ecf20Sopenharmony_ci * 978c2ecf20Sopenharmony_ci * Stop the async dma engine (r6xx-evergreen). 988c2ecf20Sopenharmony_ci */ 998c2ecf20Sopenharmony_civoid r600_dma_stop(struct radeon_device *rdev) 1008c2ecf20Sopenharmony_ci{ 1018c2ecf20Sopenharmony_ci u32 rb_cntl = RREG32(DMA_RB_CNTL); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) 1048c2ecf20Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 1078c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL, rb_cntl); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 1108c2ecf20Sopenharmony_ci} 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci/** 1138c2ecf20Sopenharmony_ci * r600_dma_resume - setup and start the async dma engine 1148c2ecf20Sopenharmony_ci * 1158c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 1168c2ecf20Sopenharmony_ci * 1178c2ecf20Sopenharmony_ci * Set up the DMA ring buffer and enable it. (r6xx-evergreen). 1188c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure. 1198c2ecf20Sopenharmony_ci */ 1208c2ecf20Sopenharmony_ciint r600_dma_resume(struct radeon_device *rdev) 1218c2ecf20Sopenharmony_ci{ 1228c2ecf20Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 1238c2ecf20Sopenharmony_ci u32 rb_cntl, dma_cntl, ib_cntl; 1248c2ecf20Sopenharmony_ci u32 rb_bufsz; 1258c2ecf20Sopenharmony_ci int r; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); 1288c2ecf20Sopenharmony_ci WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci /* Set ring buffer size in dwords */ 1318c2ecf20Sopenharmony_ci rb_bufsz = order_base_2(ring->ring_size / 4); 1328c2ecf20Sopenharmony_ci rb_cntl = rb_bufsz << 1; 1338c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1348c2ecf20Sopenharmony_ci rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 1358c2ecf20Sopenharmony_ci#endif 1368c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL, rb_cntl); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci /* Initialize the ring buffer's read and write pointers */ 1398c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR, 0); 1408c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR, 0); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci /* set the wb address whether it's enabled or not */ 1438c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_HI, 1448c2ecf20Sopenharmony_ci upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); 1458c2ecf20Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_LO, 1468c2ecf20Sopenharmony_ci ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci if (rdev->wb.enabled) 1498c2ecf20Sopenharmony_ci rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* enable DMA IBs */ 1548c2ecf20Sopenharmony_ci ib_cntl = DMA_IB_ENABLE; 1558c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1568c2ecf20Sopenharmony_ci ib_cntl |= DMA_IB_SWAP_ENABLE; 1578c2ecf20Sopenharmony_ci#endif 1588c2ecf20Sopenharmony_ci WREG32(DMA_IB_CNTL, ib_cntl); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci dma_cntl = RREG32(DMA_CNTL); 1618c2ecf20Sopenharmony_ci dma_cntl &= ~CTXEMPTY_INT_ENABLE; 1628c2ecf20Sopenharmony_ci WREG32(DMA_CNTL, dma_cntl); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci if (rdev->family >= CHIP_RV770) 1658c2ecf20Sopenharmony_ci WREG32(DMA_MODE, 1); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci ring->wptr = 0; 1688c2ecf20Sopenharmony_ci WREG32(DMA_RB_WPTR, ring->wptr << 2); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci ring->ready = true; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); 1758c2ecf20Sopenharmony_ci if (r) { 1768c2ecf20Sopenharmony_ci ring->ready = false; 1778c2ecf20Sopenharmony_ci return r; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) 1818c2ecf20Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci return 0; 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci/** 1878c2ecf20Sopenharmony_ci * r600_dma_fini - tear down the async dma engine 1888c2ecf20Sopenharmony_ci * 1898c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 1908c2ecf20Sopenharmony_ci * 1918c2ecf20Sopenharmony_ci * Stop the async dma engine and free the ring (r6xx-evergreen). 1928c2ecf20Sopenharmony_ci */ 1938c2ecf20Sopenharmony_civoid r600_dma_fini(struct radeon_device *rdev) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci r600_dma_stop(rdev); 1968c2ecf20Sopenharmony_ci radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 1978c2ecf20Sopenharmony_ci} 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci/** 2008c2ecf20Sopenharmony_ci * r600_dma_is_lockup - Check if the DMA engine is locked up 2018c2ecf20Sopenharmony_ci * 2028c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 2038c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information 2048c2ecf20Sopenharmony_ci * 2058c2ecf20Sopenharmony_ci * Check if the async DMA engine is locked up. 2068c2ecf20Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not. 2078c2ecf20Sopenharmony_ci */ 2088c2ecf20Sopenharmony_cibool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci u32 reset_mask = r600_gpu_check_soft_reset(rdev); 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci if (!(reset_mask & RADEON_RESET_DMA)) { 2138c2ecf20Sopenharmony_ci radeon_ring_lockup_update(rdev, ring); 2148c2ecf20Sopenharmony_ci return false; 2158c2ecf20Sopenharmony_ci } 2168c2ecf20Sopenharmony_ci return radeon_ring_test_lockup(rdev, ring); 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci/** 2218c2ecf20Sopenharmony_ci * r600_dma_ring_test - simple async dma engine test 2228c2ecf20Sopenharmony_ci * 2238c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 2248c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information 2258c2ecf20Sopenharmony_ci * 2268c2ecf20Sopenharmony_ci * Test the DMA engine by writing using it to write an 2278c2ecf20Sopenharmony_ci * value to memory. (r6xx-SI). 2288c2ecf20Sopenharmony_ci * Returns 0 for success, error for failure. 2298c2ecf20Sopenharmony_ci */ 2308c2ecf20Sopenharmony_ciint r600_dma_ring_test(struct radeon_device *rdev, 2318c2ecf20Sopenharmony_ci struct radeon_ring *ring) 2328c2ecf20Sopenharmony_ci{ 2338c2ecf20Sopenharmony_ci unsigned i; 2348c2ecf20Sopenharmony_ci int r; 2358c2ecf20Sopenharmony_ci unsigned index; 2368c2ecf20Sopenharmony_ci u32 tmp; 2378c2ecf20Sopenharmony_ci u64 gpu_addr; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 2408c2ecf20Sopenharmony_ci index = R600_WB_DMA_RING_TEST_OFFSET; 2418c2ecf20Sopenharmony_ci else 2428c2ecf20Sopenharmony_ci index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci gpu_addr = rdev->wb.gpu_addr + index; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci tmp = 0xCAFEDEAD; 2478c2ecf20Sopenharmony_ci rdev->wb.wb[index/4] = cpu_to_le32(tmp); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci r = radeon_ring_lock(rdev, ring, 4); 2508c2ecf20Sopenharmony_ci if (r) { 2518c2ecf20Sopenharmony_ci DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); 2528c2ecf20Sopenharmony_ci return r; 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 2558c2ecf20Sopenharmony_ci radeon_ring_write(ring, lower_32_bits(gpu_addr)); 2568c2ecf20Sopenharmony_ci radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); 2578c2ecf20Sopenharmony_ci radeon_ring_write(ring, 0xDEADBEEF); 2588c2ecf20Sopenharmony_ci radeon_ring_unlock_commit(rdev, ring, false); 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci for (i = 0; i < rdev->usec_timeout; i++) { 2618c2ecf20Sopenharmony_ci tmp = le32_to_cpu(rdev->wb.wb[index/4]); 2628c2ecf20Sopenharmony_ci if (tmp == 0xDEADBEEF) 2638c2ecf20Sopenharmony_ci break; 2648c2ecf20Sopenharmony_ci udelay(1); 2658c2ecf20Sopenharmony_ci } 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci if (i < rdev->usec_timeout) { 2688c2ecf20Sopenharmony_ci DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2698c2ecf20Sopenharmony_ci } else { 2708c2ecf20Sopenharmony_ci DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", 2718c2ecf20Sopenharmony_ci ring->idx, tmp); 2728c2ecf20Sopenharmony_ci r = -EINVAL; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci return r; 2758c2ecf20Sopenharmony_ci} 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci/** 2788c2ecf20Sopenharmony_ci * r600_dma_fence_ring_emit - emit a fence on the DMA ring 2798c2ecf20Sopenharmony_ci * 2808c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 2818c2ecf20Sopenharmony_ci * @fence: radeon fence object 2828c2ecf20Sopenharmony_ci * 2838c2ecf20Sopenharmony_ci * Add a DMA fence packet to the ring to write 2848c2ecf20Sopenharmony_ci * the fence seq number and DMA trap packet to generate 2858c2ecf20Sopenharmony_ci * an interrupt if needed (r6xx-r7xx). 2868c2ecf20Sopenharmony_ci */ 2878c2ecf20Sopenharmony_civoid r600_dma_fence_ring_emit(struct radeon_device *rdev, 2888c2ecf20Sopenharmony_ci struct radeon_fence *fence) 2898c2ecf20Sopenharmony_ci{ 2908c2ecf20Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[fence->ring]; 2918c2ecf20Sopenharmony_ci u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci /* write the fence */ 2948c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); 2958c2ecf20Sopenharmony_ci radeon_ring_write(ring, addr & 0xfffffffc); 2968c2ecf20Sopenharmony_ci radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); 2978c2ecf20Sopenharmony_ci radeon_ring_write(ring, lower_32_bits(fence->seq)); 2988c2ecf20Sopenharmony_ci /* generate an interrupt */ 2998c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); 3008c2ecf20Sopenharmony_ci} 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci/** 3038c2ecf20Sopenharmony_ci * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring 3048c2ecf20Sopenharmony_ci * 3058c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 3068c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information 3078c2ecf20Sopenharmony_ci * @semaphore: radeon semaphore object 3088c2ecf20Sopenharmony_ci * @emit_wait: wait or signal semaphore 3098c2ecf20Sopenharmony_ci * 3108c2ecf20Sopenharmony_ci * Add a DMA semaphore packet to the ring wait on or signal 3118c2ecf20Sopenharmony_ci * other rings (r6xx-SI). 3128c2ecf20Sopenharmony_ci */ 3138c2ecf20Sopenharmony_cibool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, 3148c2ecf20Sopenharmony_ci struct radeon_ring *ring, 3158c2ecf20Sopenharmony_ci struct radeon_semaphore *semaphore, 3168c2ecf20Sopenharmony_ci bool emit_wait) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci u64 addr = semaphore->gpu_addr; 3198c2ecf20Sopenharmony_ci u32 s = emit_wait ? 0 : 1; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); 3228c2ecf20Sopenharmony_ci radeon_ring_write(ring, addr & 0xfffffffc); 3238c2ecf20Sopenharmony_ci radeon_ring_write(ring, upper_32_bits(addr) & 0xff); 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci return true; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci/** 3298c2ecf20Sopenharmony_ci * r600_dma_ib_test - test an IB on the DMA engine 3308c2ecf20Sopenharmony_ci * 3318c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 3328c2ecf20Sopenharmony_ci * @ring: radeon_ring structure holding ring information 3338c2ecf20Sopenharmony_ci * 3348c2ecf20Sopenharmony_ci * Test a simple IB in the DMA ring (r6xx-SI). 3358c2ecf20Sopenharmony_ci * Returns 0 on success, error on failure. 3368c2ecf20Sopenharmony_ci */ 3378c2ecf20Sopenharmony_ciint r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) 3388c2ecf20Sopenharmony_ci{ 3398c2ecf20Sopenharmony_ci struct radeon_ib ib; 3408c2ecf20Sopenharmony_ci unsigned i; 3418c2ecf20Sopenharmony_ci unsigned index; 3428c2ecf20Sopenharmony_ci int r; 3438c2ecf20Sopenharmony_ci u32 tmp = 0; 3448c2ecf20Sopenharmony_ci u64 gpu_addr; 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 3478c2ecf20Sopenharmony_ci index = R600_WB_DMA_RING_TEST_OFFSET; 3488c2ecf20Sopenharmony_ci else 3498c2ecf20Sopenharmony_ci index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci gpu_addr = rdev->wb.gpu_addr + index; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); 3548c2ecf20Sopenharmony_ci if (r) { 3558c2ecf20Sopenharmony_ci DRM_ERROR("radeon: failed to get ib (%d).\n", r); 3568c2ecf20Sopenharmony_ci return r; 3578c2ecf20Sopenharmony_ci } 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); 3608c2ecf20Sopenharmony_ci ib.ptr[1] = lower_32_bits(gpu_addr); 3618c2ecf20Sopenharmony_ci ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff; 3628c2ecf20Sopenharmony_ci ib.ptr[3] = 0xDEADBEEF; 3638c2ecf20Sopenharmony_ci ib.length_dw = 4; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci r = radeon_ib_schedule(rdev, &ib, NULL, false); 3668c2ecf20Sopenharmony_ci if (r) { 3678c2ecf20Sopenharmony_ci radeon_ib_free(rdev, &ib); 3688c2ecf20Sopenharmony_ci DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3698c2ecf20Sopenharmony_ci return r; 3708c2ecf20Sopenharmony_ci } 3718c2ecf20Sopenharmony_ci r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( 3728c2ecf20Sopenharmony_ci RADEON_USEC_IB_TEST_TIMEOUT)); 3738c2ecf20Sopenharmony_ci if (r < 0) { 3748c2ecf20Sopenharmony_ci DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3758c2ecf20Sopenharmony_ci return r; 3768c2ecf20Sopenharmony_ci } else if (r == 0) { 3778c2ecf20Sopenharmony_ci DRM_ERROR("radeon: fence wait timed out.\n"); 3788c2ecf20Sopenharmony_ci return -ETIMEDOUT; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci r = 0; 3818c2ecf20Sopenharmony_ci for (i = 0; i < rdev->usec_timeout; i++) { 3828c2ecf20Sopenharmony_ci tmp = le32_to_cpu(rdev->wb.wb[index/4]); 3838c2ecf20Sopenharmony_ci if (tmp == 0xDEADBEEF) 3848c2ecf20Sopenharmony_ci break; 3858c2ecf20Sopenharmony_ci udelay(1); 3868c2ecf20Sopenharmony_ci } 3878c2ecf20Sopenharmony_ci if (i < rdev->usec_timeout) { 3888c2ecf20Sopenharmony_ci DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); 3898c2ecf20Sopenharmony_ci } else { 3908c2ecf20Sopenharmony_ci DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); 3918c2ecf20Sopenharmony_ci r = -EINVAL; 3928c2ecf20Sopenharmony_ci } 3938c2ecf20Sopenharmony_ci radeon_ib_free(rdev, &ib); 3948c2ecf20Sopenharmony_ci return r; 3958c2ecf20Sopenharmony_ci} 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci/** 3988c2ecf20Sopenharmony_ci * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine 3998c2ecf20Sopenharmony_ci * 4008c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 4018c2ecf20Sopenharmony_ci * @ib: IB object to schedule 4028c2ecf20Sopenharmony_ci * 4038c2ecf20Sopenharmony_ci * Schedule an IB in the DMA ring (r6xx-r7xx). 4048c2ecf20Sopenharmony_ci */ 4058c2ecf20Sopenharmony_civoid r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) 4068c2ecf20Sopenharmony_ci{ 4078c2ecf20Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[ib->ring]; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci if (rdev->wb.enabled) { 4108c2ecf20Sopenharmony_ci u32 next_rptr = ring->wptr + 4; 4118c2ecf20Sopenharmony_ci while ((next_rptr & 7) != 5) 4128c2ecf20Sopenharmony_ci next_rptr++; 4138c2ecf20Sopenharmony_ci next_rptr += 3; 4148c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 4158c2ecf20Sopenharmony_ci radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 4168c2ecf20Sopenharmony_ci radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 4178c2ecf20Sopenharmony_ci radeon_ring_write(ring, next_rptr); 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 4218c2ecf20Sopenharmony_ci * Pad as necessary with NOPs. 4228c2ecf20Sopenharmony_ci */ 4238c2ecf20Sopenharmony_ci while ((ring->wptr & 7) != 5) 4248c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 4258c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); 4268c2ecf20Sopenharmony_ci radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 4278c2ecf20Sopenharmony_ci radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci} 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci/** 4328c2ecf20Sopenharmony_ci * r600_copy_dma - copy pages using the DMA engine 4338c2ecf20Sopenharmony_ci * 4348c2ecf20Sopenharmony_ci * @rdev: radeon_device pointer 4358c2ecf20Sopenharmony_ci * @src_offset: src GPU address 4368c2ecf20Sopenharmony_ci * @dst_offset: dst GPU address 4378c2ecf20Sopenharmony_ci * @num_gpu_pages: number of GPU pages to xfer 4388c2ecf20Sopenharmony_ci * @resv: reservation object to sync to 4398c2ecf20Sopenharmony_ci * 4408c2ecf20Sopenharmony_ci * Copy GPU paging using the DMA engine (r6xx). 4418c2ecf20Sopenharmony_ci * Used by the radeon ttm implementation to move pages if 4428c2ecf20Sopenharmony_ci * registered as the asic copy callback. 4438c2ecf20Sopenharmony_ci */ 4448c2ecf20Sopenharmony_cistruct radeon_fence *r600_copy_dma(struct radeon_device *rdev, 4458c2ecf20Sopenharmony_ci uint64_t src_offset, uint64_t dst_offset, 4468c2ecf20Sopenharmony_ci unsigned num_gpu_pages, 4478c2ecf20Sopenharmony_ci struct dma_resv *resv) 4488c2ecf20Sopenharmony_ci{ 4498c2ecf20Sopenharmony_ci struct radeon_fence *fence; 4508c2ecf20Sopenharmony_ci struct radeon_sync sync; 4518c2ecf20Sopenharmony_ci int ring_index = rdev->asic->copy.dma_ring_index; 4528c2ecf20Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[ring_index]; 4538c2ecf20Sopenharmony_ci u32 size_in_dw, cur_size_in_dw; 4548c2ecf20Sopenharmony_ci int i, num_loops; 4558c2ecf20Sopenharmony_ci int r = 0; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci radeon_sync_create(&sync); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; 4608c2ecf20Sopenharmony_ci num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); 4618c2ecf20Sopenharmony_ci r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); 4628c2ecf20Sopenharmony_ci if (r) { 4638c2ecf20Sopenharmony_ci DRM_ERROR("radeon: moving bo (%d).\n", r); 4648c2ecf20Sopenharmony_ci radeon_sync_free(rdev, &sync, NULL); 4658c2ecf20Sopenharmony_ci return ERR_PTR(r); 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci radeon_sync_resv(rdev, &sync, resv, false); 4698c2ecf20Sopenharmony_ci radeon_sync_rings(rdev, &sync, ring->idx); 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci for (i = 0; i < num_loops; i++) { 4728c2ecf20Sopenharmony_ci cur_size_in_dw = size_in_dw; 4738c2ecf20Sopenharmony_ci if (cur_size_in_dw > 0xFFFE) 4748c2ecf20Sopenharmony_ci cur_size_in_dw = 0xFFFE; 4758c2ecf20Sopenharmony_ci size_in_dw -= cur_size_in_dw; 4768c2ecf20Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); 4778c2ecf20Sopenharmony_ci radeon_ring_write(ring, dst_offset & 0xfffffffc); 4788c2ecf20Sopenharmony_ci radeon_ring_write(ring, src_offset & 0xfffffffc); 4798c2ecf20Sopenharmony_ci radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | 4808c2ecf20Sopenharmony_ci (upper_32_bits(src_offset) & 0xff))); 4818c2ecf20Sopenharmony_ci src_offset += cur_size_in_dw * 4; 4828c2ecf20Sopenharmony_ci dst_offset += cur_size_in_dw * 4; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci r = radeon_fence_emit(rdev, &fence, ring->idx); 4868c2ecf20Sopenharmony_ci if (r) { 4878c2ecf20Sopenharmony_ci radeon_ring_unlock_undo(rdev, ring); 4888c2ecf20Sopenharmony_ci radeon_sync_free(rdev, &sync, NULL); 4898c2ecf20Sopenharmony_ci return ERR_PTR(r); 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci radeon_ring_unlock_commit(rdev, ring, false); 4938c2ecf20Sopenharmony_ci radeon_sync_free(rdev, &sync, fence); 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci return fence; 4968c2ecf20Sopenharmony_ci} 497